!--------------------------------------------------------------------------------------------------!
!   CP2K: A general program to perform molecular dynamics simulations                              !
!   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
!                                                                                                  !
!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
!--------------------------------------------------------------------------------------------------!

! **************************************************************************************************
!> \brief given the response wavefunctions obtained by the application
!>      of the (rxp), p, and ((dk-dl)xp) operators,
!>      here the current density vector (jx, jy, jz)
!>      is computed for the 3 directions of the magnetic field (Bx, By, Bz)
!> \par History
!>      created 02-2006 [MI]
!> \author MI
! **************************************************************************************************
MODULE qs_linres_current
   USE ao_util,                         ONLY: exp_radius_very_extended
   USE basis_set_types,                 ONLY: get_gto_basis_set,&
                                              gto_basis_set_p_type,&
                                              gto_basis_set_type
   USE cell_types,                      ONLY: cell_type,&
                                              pbc
   USE cp_array_utils,                  ONLY: cp_2d_i_p_type,&
                                              cp_2d_r_p_type
   USE cp_control_types,                ONLY: dft_control_type
   USE cp_dbcsr_api,                    ONLY: &
        dbcsr_convert_offsets_to_sizes, dbcsr_copy, dbcsr_create, dbcsr_deallocate_matrix, &
        dbcsr_distribution_type, dbcsr_finalize, dbcsr_get_block_p, dbcsr_p_type, dbcsr_put_block, &
        dbcsr_set, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry
   USE cp_dbcsr_cp2k_link,              ONLY: cp_dbcsr_alloc_block_from_nbl
   USE cp_dbcsr_operations,             ONLY: cp_dbcsr_plus_fm_fm_t,&
                                              cp_dbcsr_sm_fm_multiply,&
                                              dbcsr_allocate_matrix_set,&
                                              dbcsr_deallocate_matrix_set
   USE cp_fm_basic_linalg,              ONLY: cp_fm_scale_and_add,&
                                              cp_fm_trace
   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
                                              cp_fm_struct_release,&
                                              cp_fm_struct_type
   USE cp_fm_types,                     ONLY: cp_fm_create,&
                                              cp_fm_release,&
                                              cp_fm_set_all,&
                                              cp_fm_to_fm,&
                                              cp_fm_type
   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
                                              cp_logger_get_default_io_unit,&
                                              cp_logger_type,&
                                              cp_to_string
   USE cp_output_handling,              ONLY: cp_p_file,&
                                              cp_print_key_finished_output,&
                                              cp_print_key_should_output,&
                                              cp_print_key_unit_nr
   USE cp_realspace_grid_cube,          ONLY: cp_pw_to_cube
   USE cube_utils,                      ONLY: compute_cube_center,&
                                              cube_info_type,&
                                              return_cube
   USE gaussian_gridlevels,             ONLY: gridlevel_info_type
   USE grid_api,                        ONLY: &
        GRID_FUNC_AB, GRID_FUNC_ADBmDAB_X, GRID_FUNC_ADBmDAB_Y, GRID_FUNC_ADBmDAB_Z, &
        GRID_FUNC_ARDBmDARB_XX, GRID_FUNC_ARDBmDARB_XY, GRID_FUNC_ARDBmDARB_XZ, &
        GRID_FUNC_ARDBmDARB_YX, GRID_FUNC_ARDBmDARB_YY, GRID_FUNC_ARDBmDARB_YZ, &
        GRID_FUNC_ARDBmDARB_ZX, GRID_FUNC_ARDBmDARB_ZY, GRID_FUNC_ARDBmDARB_ZZ, &
        collocate_pgf_product
   USE input_constants,                 ONLY: current_gauge_atom
   USE input_section_types,             ONLY: section_get_ivals,&
                                              section_get_lval,&
                                              section_vals_get_subs_vals,&
                                              section_vals_type
   USE kinds,                           ONLY: default_path_length,&
                                              default_string_length,&
                                              dp
   USE mathconstants,                   ONLY: twopi
   USE memory_utilities,                ONLY: reallocate
   USE message_passing,                 ONLY: mp_para_env_type
   USE orbital_pointers,                ONLY: ncoset
   USE particle_list_types,             ONLY: particle_list_type
   USE particle_methods,                ONLY: get_particle_set
   USE particle_types,                  ONLY: particle_type
   USE pw_env_types,                    ONLY: pw_env_get,&
                                              pw_env_type
   USE pw_methods,                      ONLY: pw_axpy,&
                                              pw_integrate_function,&
                                              pw_scale,&
                                              pw_zero
   USE pw_pool_types,                   ONLY: pw_pool_type
   USE pw_types,                        ONLY: pw_c1d_gs_type,&
                                              pw_r3d_rs_type
   USE qs_environment_types,            ONLY: get_qs_env,&
                                              qs_environment_type
   USE qs_kind_types,                   ONLY: get_qs_kind,&
                                              get_qs_kind_set,&
                                              qs_kind_type
   USE qs_linres_atom_current,          ONLY: calculate_jrho_atom,&
                                              calculate_jrho_atom_coeff,&
                                              calculate_jrho_atom_rad
   USE qs_linres_op,                    ONLY: fac_vecp,&
                                              fm_scale_by_pbc_AC,&
                                              ind_m2,&
                                              set_vecp,&
                                              set_vecp_rev
   USE qs_linres_types,                 ONLY: current_env_type,&
                                              get_current_env
   USE qs_matrix_pools,                 ONLY: qs_matrix_pools_type
   USE qs_mo_types,                     ONLY: get_mo_set,&
                                              mo_set_type
   USE qs_neighbor_list_types,          ONLY: get_iterator_info,&
                                              neighbor_list_iterate,&
                                              neighbor_list_iterator_create,&
                                              neighbor_list_iterator_p_type,&
                                              neighbor_list_iterator_release,&
                                              neighbor_list_set_p_type
   USE qs_operators_ao,                 ONLY: build_lin_mom_matrix,&
                                              rRc_xyz_der_ao
   USE qs_rho_types,                    ONLY: qs_rho_get
   USE qs_subsys_types,                 ONLY: qs_subsys_get,&
                                              qs_subsys_type
   USE realspace_grid_types,            ONLY: realspace_grid_desc_p_type,&
                                              realspace_grid_desc_type,&
                                              realspace_grid_type,&
                                              rs_grid_create,&
                                              rs_grid_mult_and_add,&
                                              rs_grid_release,&
                                              rs_grid_zero
   USE rs_pw_interface,                 ONLY: density_rs2pw
   USE task_list_methods,               ONLY: distribute_tasks,&
                                              rs_distribute_matrix,&
                                              task_list_inner_loop
   USE task_list_types,                 ONLY: atom_pair_type,&
                                              reallocate_tasks,&
                                              task_type
#include "./base/base_uses.f90"

   IMPLICIT NONE

   PRIVATE

   ! *** Public subroutines ***
   PUBLIC :: current_build_current, current_build_chi, calculate_jrho_resp

   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_linres_current'

   TYPE box_type
      INTEGER :: n = -1
      REAL(dp), POINTER, DIMENSION(:, :) :: r => NULL()
   END TYPE box_type
   REAL(dp), DIMENSION(3, 3, 3), PARAMETER  :: Levi_Civita = RESHAPE([ &
                                                          0.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, -1.0_dp, 0.0_dp, 1.0_dp, 0.0_dp, &
                                                          0.0_dp, 0.0_dp, 1.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, -1.0_dp, 0.0_dp, 0.0_dp, &
                                                0.0_dp, -1.0_dp, 0.0_dp, 1.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 0.0_dp], [3, 3, 3])

CONTAINS

! **************************************************************************************************
!> \brief First calculate the density matrixes, for each component of the current
!>       they are 3 because of the r dependent terms
!>       Next it collocates on the grid to have J(r)
!>       In the GAPW case one need to collocate on the PW grid only the soft part
!>       while the rest goes on Lebedev grids
!>       The contributions to the shift and to the susceptibility will be
!>       calculated separately and added only at the end
!>       The calculation of the shift tensor is performed on the position of the atoms
!>       and on other selected points in real space summing up the contributions
!>       from the PW grid current density and the local densities
!>       Spline interpolation is used
!> \param current_env ...
!> \param qs_env ...
!> \param iB ...
!> \author MI
!> \note
!>       The susceptibility is needed to compute the G=0 term of the shift
!>       in reciprocal space. \chi_{ij} = \int (r x Jj)_i
!>       (where Jj id the current density generated by the field in direction j)
!>       To calculate the susceptibility on the PW grids it is necessary to apply
!>       the position operator yet another time.
!>       This cannot be done on directly on the full J(r) because it is not localized
!>       Therefore it is done state by state (see linres_nmr_shift)
! **************************************************************************************************
   SUBROUTINE current_build_current(current_env, qs_env, iB)
      !
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      INTEGER, INTENT(IN)                                :: iB

      CHARACTER(LEN=*), PARAMETER :: routineN = 'current_build_current'

      CHARACTER(LEN=default_path_length)                 :: ext, filename, my_pos
      INTEGER                                            :: handle, idir, iiB, iiiB, ispin, istate, &
                                                            j, jstate, nao, natom, nmo, nspins, &
                                                            nstates(2), output_unit, unit_nr
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf
      INTEGER, DIMENSION(:), POINTER                     :: row_blk_sizes
      LOGICAL                                            :: append_cube, gapw, mpi_io
      REAL(dp)                                           :: dk(3), jrho_tot_G(3, 3), &
                                                            jrho_tot_R(3, 3), maxocc, scale_fac
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: ddk
      REAL(dp), EXTERNAL                                 :: DDOT
      TYPE(cell_type), POINTER                           :: cell
      TYPE(cp_2d_i_p_type), DIMENSION(:), POINTER        :: center_list
      TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: p_psi1, psi1
      TYPE(cp_fm_type), DIMENSION(:), POINTER            :: psi0_order
      TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: psi1_D, psi1_p, psi1_rxp
      TYPE(cp_fm_type), POINTER                          :: mo_coeff
      TYPE(cp_logger_type), POINTER                      :: logger
      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: density_matrix0, density_matrix_a, &
                                                            density_matrix_ii, density_matrix_iii
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
         POINTER                                         :: sab_all
      TYPE(particle_list_type), POINTER                  :: particles
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(pw_c1d_gs_type), DIMENSION(:), POINTER        :: jrho1_g
      TYPE(pw_env_type), POINTER                         :: pw_env
      TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
      TYPE(pw_r3d_rs_type)                               :: wf_r
      TYPE(pw_r3d_rs_type), DIMENSION(:), POINTER        :: jrho1_r
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
      TYPE(qs_matrix_pools_type), POINTER                :: mpools
      TYPE(qs_subsys_type), POINTER                      :: subsys
      TYPE(realspace_grid_desc_type), POINTER            :: auxbas_rs_desc
      TYPE(section_vals_type), POINTER                   :: current_section

      CALL timeset(routineN, handle)
      !
      NULLIFY (logger, current_section, density_matrix0, density_matrix_a, &
               density_matrix_ii, density_matrix_iii, cell, dft_control, mos, &
               particle_set, pw_env, auxbas_rs_desc, auxbas_pw_pool, &
               para_env, center_list, mo_coeff, jrho1_r, jrho1_g, &
               psi1_p, psi1_D, psi1_rxp, sab_all, qs_kind_set)

      logger => cp_get_default_logger()
      output_unit = cp_logger_get_default_io_unit(logger)
      !
      !
      CALL get_current_env(current_env=current_env, &
                           center_list=center_list, &
                           psi1_rxp=psi1_rxp, &
                           psi1_D=psi1_D, &
                           psi1_p=psi1_p, &
                           psi0_order=psi0_order, &
                           nstates=nstates, &
                           nao=nao)
      !
      !
      CALL get_qs_env(qs_env=qs_env, &
                      cell=cell, &
                      dft_control=dft_control, &
                      mos=mos, &
                      mpools=mpools, &
                      pw_env=pw_env, &
                      para_env=para_env, &
                      subsys=subsys, &
                      sab_all=sab_all, &
                      particle_set=particle_set, &
                      qs_kind_set=qs_kind_set, &
                      dbcsr_dist=dbcsr_dist)

      CALL qs_subsys_get(subsys, particles=particles)

      gapw = dft_control%qs_control%gapw
      nspins = dft_control%nspins
      natom = SIZE(particle_set, 1)
      !
      ! allocate temporary arrays
      ALLOCATE (psi1(nspins), p_psi1(nspins))
      DO ispin = 1, nspins
         CALL cp_fm_create(psi1(ispin), psi0_order(ispin)%matrix_struct)
         CALL cp_fm_create(p_psi1(ispin), psi0_order(ispin)%matrix_struct)
         CALL cp_fm_set_all(psi1(ispin), 0.0_dp)
         CALL cp_fm_set_all(p_psi1(ispin), 0.0_dp)
      END DO
      !
      !
      CALL dbcsr_allocate_matrix_set(density_matrix0, nspins)
      CALL dbcsr_allocate_matrix_set(density_matrix_a, nspins)
      CALL dbcsr_allocate_matrix_set(density_matrix_ii, nspins)
      CALL dbcsr_allocate_matrix_set(density_matrix_iii, nspins)
      !
      ! prepare for allocation
      ALLOCATE (first_sgf(natom))
      ALLOCATE (last_sgf(natom))
      CALL get_particle_set(particle_set, qs_kind_set, &
                            first_sgf=first_sgf, &
                            last_sgf=last_sgf)
      ALLOCATE (row_blk_sizes(natom))
      CALL dbcsr_convert_offsets_to_sizes(first_sgf, row_blk_sizes, last_sgf)
      DEALLOCATE (first_sgf)
      DEALLOCATE (last_sgf)
      !
      !
      DO ispin = 1, nspins
         !
         !density_matrix0
         ALLOCATE (density_matrix0(ispin)%matrix)
         CALL dbcsr_create(matrix=density_matrix0(ispin)%matrix, &
                           name="density_matrix0", &
                           dist=dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, &
                           row_blk_size=row_blk_sizes, col_blk_size=row_blk_sizes, &
                           mutable_work=.TRUE.)
         CALL cp_dbcsr_alloc_block_from_nbl(density_matrix0(ispin)%matrix, sab_all)
         !
         !density_matrix_a
         ALLOCATE (density_matrix_a(ispin)%matrix)
         CALL dbcsr_copy(density_matrix_a(ispin)%matrix, density_matrix0(ispin)%matrix, &
                         name="density_matrix_a")
         !
         !density_matrix_ii
         ALLOCATE (density_matrix_ii(ispin)%matrix)
         CALL dbcsr_copy(density_matrix_ii(ispin)%matrix, density_matrix0(ispin)%matrix, &
                         name="density_matrix_ii")
         !
         !density_matrix_iii
         ALLOCATE (density_matrix_iii(ispin)%matrix)
         CALL dbcsr_copy(density_matrix_iii(ispin)%matrix, density_matrix0(ispin)%matrix, &
                         name="density_matrix_iii")
      END DO
      !
      DEALLOCATE (row_blk_sizes)
      !
      !
      current_section => section_vals_get_subs_vals(qs_env%input, "PROPERTIES%LINRES%CURRENT")
      !
      !
      jrho_tot_G = 0.0_dp
      jrho_tot_R = 0.0_dp
      !
      ! Lets go!
      CALL set_vecp(iB, iiB, iiiB)
      DO ispin = 1, nspins
         nmo = nstates(ispin)
         mo_coeff => psi0_order(ispin)
         !maxocc = max_occ(ispin)
         !
         CALL get_mo_set(mo_set=mos(ispin), maxocc=maxocc)
         !
         !
         ! Build the first density matrix
         CALL dbcsr_set(density_matrix0(ispin)%matrix, 0.0_dp)
         CALL cp_dbcsr_plus_fm_fm_t(sparse_matrix=density_matrix0(ispin)%matrix, &
                                    matrix_v=mo_coeff, matrix_g=mo_coeff, &
                                    ncol=nmo, alpha=maxocc)
         !
         ! Allocate buffer vectors
         ALLOCATE (ddk(3, nmo))
         !
         ! Construct the 3 density matrices for the field in direction iB
         !
         ! First the full matrix psi_a_iB
         ASSOCIATE (psi_a_iB => psi1(ispin), psi_buf => p_psi1(ispin))
            CALL cp_fm_set_all(psi_a_iB, 0.0_dp)
            CALL cp_fm_set_all(psi_buf, 0.0_dp)
            ! psi_a_iB = - (R_\nu-dk)_ii psi1_piiiB + (R_\nu-dk)_iii psi1_piiB
            !
            ! contributions from the response psi1_p_ii and psi1_p_iii
            DO istate = 1, current_env%nbr_center(ispin)
               dk(1:3) = current_env%centers_set(ispin)%array(1:3, istate)
               !
               ! Copy the vector in the full matrix psi1
               !nstate_loc = center_list(ispin)%array(1,icenter+1)-center_list(ispin)%array(1,icenter)
               DO j = center_list(ispin)%array(1, istate), center_list(ispin)%array(1, istate + 1) - 1
                  jstate = center_list(ispin)%array(2, j)
                  CALL cp_fm_to_fm(psi1_p(ispin, iiB), psi_a_iB, 1, jstate, jstate)
                  CALL cp_fm_to_fm(psi1_p(ispin, iiiB), psi_buf, 1, jstate, jstate)
                  ddk(:, jstate) = dk(1:3)
               END DO
            END DO ! istate
            CALL fm_scale_by_pbc_AC(psi_a_iB, current_env%basisfun_center, ddk, cell, iiiB)
            CALL fm_scale_by_pbc_AC(psi_buf, current_env%basisfun_center, ddk, cell, iiB)
            CALL cp_fm_scale_and_add(-1.0_dp, psi_a_iB, 1.0_dp, psi_buf)
            !
            !psi_a_iB = psi_a_iB + psi1_rxp
            !
            ! contribution from the response psi1_rxp
            CALL cp_fm_scale_and_add(-1.0_dp, psi_a_iB, 1.0_dp, psi1_rxp(ispin, iB))
            !
            !psi_a_iB = psi_a_iB - psi1_D
            IF (current_env%full) THEN
               !
               ! contribution from the response psi1_D
               CALL cp_fm_scale_and_add(1.0_dp, psi_a_iB, -1.0_dp, psi1_D(ispin, iB))
            END IF
            !
            ! Multiply by the occupation number for the density matrix
            !
            ! Build the first density matrix
            CALL dbcsr_set(density_matrix_a(ispin)%matrix, 0.0_dp)
            CALL cp_dbcsr_plus_fm_fm_t(sparse_matrix=density_matrix_a(ispin)%matrix, &
                                       matrix_v=mo_coeff, matrix_g=psi_a_iB, &
                                       ncol=nmo, alpha=maxocc)
         END ASSOCIATE
         !
         ! Build the second density matrix
         CALL dbcsr_set(density_matrix_iii(ispin)%matrix, 0.0_dp)
         CALL cp_dbcsr_plus_fm_fm_t(sparse_matrix=density_matrix_iii(ispin)%matrix, &
                                    matrix_v=mo_coeff, matrix_g=psi1_p(ispin, iiiB), &
                                    ncol=nmo, alpha=maxocc)
         !
         ! Build the third density matrix
         CALL dbcsr_set(density_matrix_ii(ispin)%matrix, 0.0_dp)
         CALL cp_dbcsr_plus_fm_fm_t(sparse_matrix=density_matrix_ii(ispin)%matrix, &
                                    matrix_v=mo_coeff, matrix_g=psi1_p(ispin, iiB), &
                                    ncol=nmo, alpha=maxocc)
         DO idir = 1, 3
            !
            ! Calculate the current density on the pw grid (only soft if GAPW)
            ! idir is the cartesian component of the response current density
            ! generated by the magnetic field pointing in cartesian direction iB
            ! Use the qs_rho_type already  used for rho during the scf
            CALL qs_rho_get(current_env%jrho1_set(idir)%rho, rho_r=jrho1_r)
            CALL qs_rho_get(current_env%jrho1_set(idir)%rho, rho_g=jrho1_g)
            ASSOCIATE (jrho_rspace => jrho1_r(ispin), jrho_gspace => jrho1_g(ispin))
               CALL pw_zero(jrho_rspace)
               CALL pw_zero(jrho_gspace)
               CALL calculate_jrho_resp(density_matrix0(ispin)%matrix, &
                                        density_matrix_a(ispin)%matrix, &
                                        density_matrix_ii(ispin)%matrix, &
                                        density_matrix_iii(ispin)%matrix, &
                                        iB, idir, jrho_rspace, jrho_gspace, qs_env, &
                                        current_env, gapw)

               scale_fac = cell%deth/twopi
               CALL pw_scale(jrho_rspace, scale_fac)
               CALL pw_scale(jrho_gspace, scale_fac)

               jrho_tot_G(idir, iB) = pw_integrate_function(jrho_gspace, isign=-1)
               jrho_tot_R(idir, iB) = pw_integrate_function(jrho_rspace, isign=-1)
            END ASSOCIATE

            IF (output_unit > 0) THEN
               WRITE (output_unit, '(T2,2(A,E24.16))') 'Integrated j_'&
                    &//ACHAR(idir + 119)//ACHAR(iB + 119)//'(r): G-space=', &
                     jrho_tot_G(idir, iB), ' R-space=', jrho_tot_R(idir, iB)
            END IF
            !
         END DO ! idir
         !
         ! Deallocate buffer vectors
         DEALLOCATE (ddk)
         !
      END DO ! ispin

      IF (gapw) THEN
         DO idir = 1, 3
            !
            ! compute the atomic response current densities on the spherical grids
            ! First the sparse matrices are multiplied by the expansion coefficients
            ! this is the equivalent of the CPC for the charge density
            CALL calculate_jrho_atom_coeff(qs_env, current_env, &
                                           density_matrix0, &
                                           density_matrix_a, &
                                           density_matrix_ii, &
                                           density_matrix_iii, &
                                           iB, idir)
            !
            ! Then the radial parts are computed on the local radial grid, atom by atom
            ! 8 functions are computed for each atom, per grid point
            ! and per LM angular momentum. The multiplication by the Clebsh-Gordon
            ! coefficients or they correspondent for the derivatives, is also done here
            CALL calculate_jrho_atom_rad(qs_env, current_env, idir)
            !
            ! The current on the atomic grids
            CALL calculate_jrho_atom(current_env, qs_env, iB, idir)
         END DO ! idir
      END IF
      !
      ! Cube files
      IF (BTEST(cp_print_key_should_output(logger%iter_info, current_section,&
           &   "PRINT%CURRENT_CUBES"), cp_p_file)) THEN
         append_cube = section_get_lval(current_section, "PRINT%CURRENT_CUBES%APPEND")
         my_pos = "REWIND"
         IF (append_cube) THEN
            my_pos = "APPEND"
         END IF
         !
         CALL pw_env_get(pw_env, auxbas_rs_desc=auxbas_rs_desc, &
                         auxbas_pw_pool=auxbas_pw_pool)
         !
         CALL auxbas_pw_pool%create_pw(wf_r)
         !
         DO idir = 1, 3
            CALL pw_zero(wf_r)
            CALL qs_rho_get(current_env%jrho1_set(idir)%rho, rho_r=jrho1_r)
            DO ispin = 1, nspins
               CALL pw_axpy(jrho1_r(ispin), wf_r, 1.0_dp)
            END DO
            !
            IF (gapw) THEN
               ! Add the local hard and soft contributions
               ! This can be done atom by atom by a spline extrapolation of the  values
               ! on the spherical grid to the grid points.
               CPABORT("GAPW needs to be finalized")
            END IF
            filename = "jresp"
            mpi_io = .TRUE.
            WRITE (ext, '(a2,I1,a2,I1,a5)') "iB", iB, "_d", idir, ".cube"
            WRITE (ext, '(a2,a1,a2,a1,a5)') "iB", ACHAR(iB + 119), "_d", ACHAR(idir + 119), ".cube"
            unit_nr = cp_print_key_unit_nr(logger, current_section, "PRINT%CURRENT_CUBES", &
                                           extension=TRIM(ext), middle_name=TRIM(filename), &
                                           log_filename=.FALSE., file_position=my_pos, &
                                           mpi_io=mpi_io)

            CALL cp_pw_to_cube(wf_r, unit_nr, "RESPONSE CURRENT DENSITY ", &
                               particles=particles, &
                               stride=section_get_ivals(current_section, "PRINT%CURRENT_CUBES%STRIDE"), &
                               mpi_io=mpi_io)
            CALL cp_print_key_finished_output(unit_nr, logger, current_section,&
                 &                            "PRINT%CURRENT_CUBES", mpi_io=mpi_io)
         END DO
         !
         CALL auxbas_pw_pool%give_back_pw(wf_r)
      END IF ! current cube
      !
      ! Integrated current response checksum
      IF (output_unit > 0) THEN
         WRITE (output_unit, '(T2,A,E24.16)') 'CheckSum R-integrated j=', &
            SQRT(DDOT(9, jrho_tot_R(1, 1), 1, jrho_tot_R(1, 1), 1))
      END IF
      !
      !
      ! Dellocate grids for the calculation of jrho and the shift
      CALL cp_fm_release(psi1)
      CALL cp_fm_release(p_psi1)

      CALL dbcsr_deallocate_matrix_set(density_matrix0)
      CALL dbcsr_deallocate_matrix_set(density_matrix_a)
      CALL dbcsr_deallocate_matrix_set(density_matrix_ii)
      CALL dbcsr_deallocate_matrix_set(density_matrix_iii)
      !
      ! Finalize
      CALL timestop(handle)
      !
   END SUBROUTINE current_build_current

! **************************************************************************************************
!> \brief Calculation of the idir component of the response current density
!>       in the presence of a constant magnetic field in direction iB
!>       the current density is collocated on the pw grid in real space
!> \param mat_d0 ...
!> \param mat_jp ...
!> \param mat_jp_rii ...
!> \param mat_jp_riii ...
!> \param iB ...
!> \param idir ...
!> \param current_rs ...
!> \param current_gs ...
!> \param qs_env ...
!> \param current_env ...
!> \param soft_valid ...
!> \param retain_rsgrid ...
!> \note
!>       The collocate is done in three parts, one for each density matrix
!>       In all cases the density matrices and therefore the collocation
!>       are not symmetric, that means that all the pairs (ab and ba) have
!>       to be considered separately
!>
!>       mat_jp_{\mu\nu} is multiplied by
!>           f_{\mu\nu} = \phi_{\mu} (d\phi_{\nu}/dr)_{idir} -
!>                        (d\phi_{\mu}/dr)_{idir} \phi_{\nu}
!>
!>       mat_jp_rii_{\mu\nu} is multiplied by
!>           f_{\mu\nu} = \phi_{\mu} (r - R_{\nu})_{iiiB} (d\phi_{\nu}/dr)_{idir} -
!>                        (d\phi_{\mu}/dr)_{idir} (r - R_{\nu})_{iiiB} \phi_{\nu} +
!>                         \phi_{\mu} \phi_{\nu}  (last term only if iiiB=idir)
!>
!>       mat_jp_riii_{\mu\nu} is multiplied by
!>                             (be careful: change in sign with respect to previous)
!>           f_{\mu\nu} = -\phi_{\mu} (r - R_{\nu})_{iiB} (d\phi_{\nu}/dr)_{idir} +
!>                        (d\phi_{\mu}/dr)_{idir} (r - R_{\nu})_{iiB} \phi_{\nu} -
!>                         \phi_{\mu} \phi_{\nu}  (last term only if iiB=idir)
!>
!>       All the terms sum up to the same grid
! **************************************************************************************************
   SUBROUTINE calculate_jrho_resp(mat_d0, mat_jp, mat_jp_rii, mat_jp_riii, iB, idir, &
                                  current_rs, current_gs, qs_env, current_env, soft_valid, retain_rsgrid)

      TYPE(dbcsr_type), POINTER                          :: mat_d0, mat_jp, mat_jp_rii, mat_jp_riii
      INTEGER, INTENT(IN)                                :: iB, idir
      TYPE(pw_r3d_rs_type), INTENT(INOUT)                :: current_rs
      TYPE(pw_c1d_gs_type), INTENT(INOUT)                :: current_gs
      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(current_env_type)                             :: current_env
      LOGICAL, INTENT(IN), OPTIONAL                      :: soft_valid, retain_rsgrid

      CHARACTER(LEN=*), PARAMETER :: routineN = 'calculate_jrho_resp'
      INTEGER, PARAMETER                                 :: max_tasks = 2000

      CHARACTER(LEN=default_string_length)               :: basis_type
      INTEGER :: adbmdab_func, bcol, brow, cindex, curr_tasks, handle, i, iatom, iatom_old, idir2, &
         igrid_level, iiB, iiiB, ikind, ikind_old, ipgf, iset, iset_old, itask, ithread, jatom, &
         jatom_old, jkind, jkind_old, jpgf, jset, jset_old, maxco, maxpgf, maxset, maxsgf, &
         maxsgf_set, na1, na2, natom, nb1, nb2, ncoa, ncob, nimages, nkind, nseta, nsetb, ntasks, &
         nthread, sgfa, sgfb
      INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, npgfa, &
                                                            npgfb, nsgfa, nsgfb
      INTEGER, DIMENSION(:, :), POINTER                  :: first_sgfa, first_sgfb
      LOGICAL                                            :: atom_pair_changed, den_found, &
                                                            den_found_a, distributed_rs_grids, &
                                                            do_igaim, my_retain_rsgrid, my_soft
      REAL(dp), DIMENSION(:, :, :), POINTER              :: my_current, my_gauge, my_rho
      REAL(KIND=dp)                                      :: eps_rho_rspace, f, kind_radius_a, &
                                                            kind_radius_b, Lxo2, Lyo2, Lzo2, &
                                                            prefactor, radius, scale, scale2, zetp
      REAL(KIND=dp), DIMENSION(3)                        :: ra, rab, rb, rp
      REAL(KIND=dp), DIMENSION(:), POINTER               :: set_radius_a, set_radius_b
      REAL(KIND=dp), DIMENSION(:, :), POINTER :: jp_block_a, jp_block_b, jp_block_c, jp_block_d, &
         jpab_a, jpab_b, jpab_c, jpab_d, rpgfa, rpgfb, sphi_a, sphi_b, work, zeta, zetb
      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: jpabt_a, jpabt_b, jpabt_c, jpabt_d, workt
      TYPE(atom_pair_type), DIMENSION(:), POINTER        :: atom_pair_recv, atom_pair_send
      TYPE(cell_type), POINTER                           :: cell
      TYPE(cube_info_type), DIMENSION(:), POINTER        :: cube_info
      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: deltajp_a, deltajp_b, deltajp_c, &
                                                            deltajp_d
      TYPE(dbcsr_type), POINTER                          :: mat_a, mat_b, mat_c, mat_d
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(gridlevel_info_type), POINTER                 :: gridlevel_info
      TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list
      TYPE(gto_basis_set_type), POINTER                  :: basis_set_a, basis_set_b, orb_basis_set
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(neighbor_list_iterator_p_type), &
         DIMENSION(:), POINTER                           :: nl_iterator
      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
         POINTER                                         :: sab_orb
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(pw_env_type), POINTER                         :: pw_env
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
      TYPE(qs_kind_type), POINTER                        :: qs_kind
      TYPE(realspace_grid_desc_p_type), DIMENSION(:), &
         POINTER                                         :: rs_descs
      TYPE(realspace_grid_type), DIMENSION(:), POINTER   :: rs_current, rs_rho
      TYPE(realspace_grid_type), DIMENSION(:, :), &
         POINTER                                         :: rs_gauge
      TYPE(task_type), DIMENSION(:), POINTER             :: tasks

      NULLIFY (qs_kind, cell, dft_control, orb_basis_set, rs_rho, &
               qs_kind_set, sab_orb, particle_set, rs_current, pw_env, &
               rs_descs, para_env, set_radius_a, set_radius_b, la_max, la_min, &
               lb_max, lb_min, npgfa, npgfb, nsgfa, nsgfb, rpgfa, rpgfb, &
               sphi_a, sphi_b, zeta, zetb, first_sgfa, first_sgfb, tasks, &
               workt, mat_a, mat_b, mat_c, mat_d, rs_gauge)
      NULLIFY (deltajp_a, deltajp_b, deltajp_c, deltajp_d)
      NULLIFY (jp_block_a, jp_block_b, jp_block_c, jp_block_d)
      NULLIFY (jpabt_a, jpabt_b, jpabt_c, jpabt_d)
      NULLIFY (atom_pair_send, atom_pair_recv)

      CALL timeset(routineN, handle)

      !
      ! Set pointers for the different gauge
      ! If do_igaim is False the current_env is never needed
      do_igaim = current_env%gauge == current_gauge_atom

      mat_a => mat_jp
      mat_b => mat_jp_rii
      mat_c => mat_jp_riii
      IF (do_igaim) mat_d => mat_d0

      my_retain_rsgrid = .FALSE.
      IF (PRESENT(retain_rsgrid)) my_retain_rsgrid = retain_rsgrid

      CALL get_qs_env(qs_env=qs_env, &
                      qs_kind_set=qs_kind_set, &
                      cell=cell, &
                      dft_control=dft_control, &
                      particle_set=particle_set, &
                      sab_all=sab_orb, &
                      para_env=para_env, &
                      pw_env=pw_env)

      IF (do_igaim) CALL get_current_env(current_env=current_env, rs_gauge=rs_gauge)

      ! Component of appearing in the vector product rxp, iiB and iiiB
      CALL set_vecp(iB, iiB, iiiB)
      !
      !
      scale2 = 0.0_dp
      idir2 = 1
      IF (idir /= iB) THEN
         CALL set_vecp_rev(idir, iB, idir2)
         scale2 = fac_vecp(idir, iB, idir2)
      END IF
      !
      ! *** assign from pw_env
      gridlevel_info => pw_env%gridlevel_info
      cube_info => pw_env%cube_info

      !   Check that the neighbor list with all the pairs is associated
      CPASSERT(ASSOCIATED(sab_orb))
      ! *** set up the pw multi-grids
      CPASSERT(ASSOCIATED(pw_env))
      CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_rho)

      distributed_rs_grids = .FALSE.
      DO igrid_level = 1, gridlevel_info%ngrid_levels
         IF (.NOT. ALL(rs_descs(igrid_level)%rs_desc%perd == 1)) THEN
            distributed_rs_grids = .TRUE.
         END IF
      END DO
      eps_rho_rspace = dft_control%qs_control%eps_rho_rspace
      nthread = 1

      !   *** Allocate work storage ***
      CALL get_qs_kind_set(qs_kind_set=qs_kind_set, &
                           maxco=maxco, &
                           maxsgf=maxsgf, &
                           maxsgf_set=maxsgf_set)

      Lxo2 = SQRT(SUM(cell%hmat(:, 1)**2))/2.0_dp
      Lyo2 = SQRT(SUM(cell%hmat(:, 2)**2))/2.0_dp
      Lzo2 = SQRT(SUM(cell%hmat(:, 3)**2))/2.0_dp

      my_soft = .FALSE.
      IF (PRESENT(soft_valid)) my_soft = soft_valid
      IF (my_soft) THEN
         basis_type = "ORB_SOFT"
      ELSE
         basis_type = "ORB"
      END IF

      nkind = SIZE(qs_kind_set)

      CALL reallocate(jpabt_a, 1, maxco, 1, maxco, 0, nthread - 1)
      CALL reallocate(jpabt_b, 1, maxco, 1, maxco, 0, nthread - 1)
      CALL reallocate(jpabt_c, 1, maxco, 1, maxco, 0, nthread - 1)
      CALL reallocate(jpabt_d, 1, maxco, 1, maxco, 0, nthread - 1)
      CALL reallocate(workt, 1, maxco, 1, maxsgf_set, 0, nthread - 1)
      CALL reallocate_tasks(tasks, max_tasks)

      ntasks = 0
      curr_tasks = SIZE(tasks)

      !   get maximum numbers
      natom = SIZE(particle_set)
      maxset = 0
      maxpgf = 0

      ! hard code matrix index (no kpoints)
      nimages = dft_control%nimages
      CPASSERT(nimages == 1)
      cindex = 1

      DO ikind = 1, nkind
         qs_kind => qs_kind_set(ikind)

         CALL get_qs_kind(qs_kind=qs_kind, basis_set=orb_basis_set)

         IF (.NOT. ASSOCIATED(orb_basis_set)) CYCLE

         CALL get_gto_basis_set(gto_basis_set=orb_basis_set, npgf=npgfa, nset=nseta)
         maxset = MAX(nseta, maxset)
         maxpgf = MAX(MAXVAL(npgfa), maxpgf)
      END DO

      !   *** Initialize working density matrix ***

      ! distributed rs grids require a matrix that will be changed (distribute_tasks)
      ! whereas this is not the case for replicated grids
      ALLOCATE (deltajp_a(1), deltajp_b(1), deltajp_c(1), deltajp_d(1))
      IF (distributed_rs_grids) THEN
         ALLOCATE (deltajp_a(1)%matrix, deltajp_b(1)%matrix, deltajp_c(1)%matrix)
         IF (do_igaim) THEN
            ALLOCATE (deltajp_d(1)%matrix)
         END IF

         CALL dbcsr_create(deltajp_a(1)%matrix, template=mat_a, name='deltajp_a')
         CALL dbcsr_create(deltajp_b(1)%matrix, template=mat_a, name='deltajp_b')
         CALL dbcsr_create(deltajp_c(1)%matrix, template=mat_a, name='deltajp_c')
         IF (do_igaim) CALL dbcsr_create(deltajp_d(1)%matrix, template=mat_a, name='deltajp_d')
      ELSE
         deltajp_a(1)%matrix => mat_a !mat_jp
         deltajp_b(1)%matrix => mat_b !mat_jp_rii
         deltajp_c(1)%matrix => mat_c !mat_jp_riii
         IF (do_igaim) deltajp_d(1)%matrix => mat_d !mat_d0
      END IF

      ALLOCATE (basis_set_list(nkind))
      DO ikind = 1, nkind
         qs_kind => qs_kind_set(ikind)
         CALL get_qs_kind(qs_kind=qs_kind, basis_set=basis_set_a, basis_type=basis_type)
         IF (ASSOCIATED(basis_set_a)) THEN
            basis_set_list(ikind)%gto_basis_set => basis_set_a
         ELSE
            NULLIFY (basis_set_list(ikind)%gto_basis_set)
         END IF
      END DO
      CALL neighbor_list_iterator_create(nl_iterator, sab_orb)
      DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
         CALL get_iterator_info(nl_iterator, ikind=ikind, jkind=jkind, iatom=iatom, jatom=jatom, r=rab)
         basis_set_a => basis_set_list(ikind)%gto_basis_set
         IF (.NOT. ASSOCIATED(basis_set_a)) CYCLE
         basis_set_b => basis_set_list(jkind)%gto_basis_set
         IF (.NOT. ASSOCIATED(basis_set_b)) CYCLE
         ra(:) = pbc(particle_set(iatom)%r, cell)
         ! basis ikind
         first_sgfa => basis_set_a%first_sgf
         la_max => basis_set_a%lmax
         la_min => basis_set_a%lmin
         npgfa => basis_set_a%npgf
         nseta = basis_set_a%nset
         nsgfa => basis_set_a%nsgf_set
         rpgfa => basis_set_a%pgf_radius
         set_radius_a => basis_set_a%set_radius
         kind_radius_a = basis_set_a%kind_radius
         sphi_a => basis_set_a%sphi
         zeta => basis_set_a%zet
         ! basis jkind
         first_sgfb => basis_set_b%first_sgf
         lb_max => basis_set_b%lmax
         lb_min => basis_set_b%lmin
         npgfb => basis_set_b%npgf
         nsetb = basis_set_b%nset
         nsgfb => basis_set_b%nsgf_set
         rpgfb => basis_set_b%pgf_radius
         set_radius_b => basis_set_b%set_radius
         kind_radius_b = basis_set_b%kind_radius
         sphi_b => basis_set_b%sphi
         zetb => basis_set_b%zet

         IF (ABS(rab(1)) > Lxo2 .OR. ABS(rab(2)) > Lyo2 .OR. ABS(rab(3)) > Lzo2) THEN
            CYCLE
         END IF

         brow = iatom
         bcol = jatom

         CALL dbcsr_get_block_p(matrix=mat_a, row=brow, col=bcol, &
                                block=jp_block_a, found=den_found_a)
         CALL dbcsr_get_block_p(matrix=mat_b, row=brow, col=bcol, &
                                block=jp_block_b, found=den_found)
         CALL dbcsr_get_block_p(matrix=mat_c, row=brow, col=bcol, &
                                block=jp_block_c, found=den_found)
         IF (do_igaim) CALL dbcsr_get_block_p(matrix=mat_d, row=brow, col=bcol, &
                                              block=jp_block_d, found=den_found)

         IF (.NOT. ASSOCIATED(jp_block_a)) CYCLE

         IF (distributed_rs_grids) THEN
            CALL dbcsr_put_block(deltajp_a(1)%matrix, brow, bcol, jp_block_a)
            CALL dbcsr_put_block(deltajp_b(1)%matrix, brow, bcol, jp_block_b)
            CALL dbcsr_put_block(deltajp_c(1)%matrix, brow, bcol, jp_block_c)
            IF (do_igaim) THEN
               CALL dbcsr_put_block(deltajp_d(1)%matrix, brow, bcol, jp_block_d)
            END IF
         END IF

         CALL task_list_inner_loop(tasks, ntasks, curr_tasks, rs_descs, &
                                   dft_control, cube_info, gridlevel_info, cindex, &
                                   iatom, jatom, rpgfa, rpgfb, zeta, zetb, kind_radius_b, &
                                   set_radius_a, set_radius_b, ra, rab, &
                                   la_max, la_min, lb_max, lb_min, npgfa, npgfb, nseta, nsetb)

      END DO
      CALL neighbor_list_iterator_release(nl_iterator)

      DEALLOCATE (basis_set_list)

      IF (distributed_rs_grids) THEN
         CALL dbcsr_finalize(deltajp_a(1)%matrix)
         CALL dbcsr_finalize(deltajp_b(1)%matrix)
         CALL dbcsr_finalize(deltajp_c(1)%matrix)
         IF (do_igaim) CALL dbcsr_finalize(deltajp_d(1)%matrix)
      END IF

      ! sorts / redistributes the task list
      CALL distribute_tasks(rs_descs=rs_descs, ntasks=ntasks, natoms=natom, tasks=tasks, &
                            atom_pair_send=atom_pair_send, atom_pair_recv=atom_pair_recv, &
                            symmetric=.FALSE., reorder_rs_grid_ranks=.TRUE., &
                            skip_load_balance_distributed=.FALSE.)

      ALLOCATE (rs_current(gridlevel_info%ngrid_levels))

      DO igrid_level = 1, gridlevel_info%ngrid_levels
         ! Here we need to reallocate the distributed rs_grids, which may have been reordered
         ! by distribute_tasks
         IF (rs_descs(igrid_level)%rs_desc%distributed .AND. .NOT. my_retain_rsgrid) THEN
            CALL rs_grid_release(rs_rho(igrid_level))
            CALL rs_grid_create(rs_rho(igrid_level), rs_descs(igrid_level)%rs_desc)
         END IF
         CALL rs_grid_zero(rs_rho(igrid_level))
         CALL rs_grid_create(rs_current(igrid_level), rs_descs(igrid_level)%rs_desc)
         CALL rs_grid_zero(rs_current(igrid_level))
      END DO

      !
      ! we need to build the gauge here
      IF (.NOT. current_env%gauge_init .AND. do_igaim) THEN
         CALL current_set_gauge(current_env, qs_env)
         current_env%gauge_init = .TRUE.
      END IF
      !
      ! for any case double check the bounds !
      IF (do_igaim) THEN
         DO igrid_level = 1, gridlevel_info%ngrid_levels
            my_rho => rs_rho(igrid_level)%r
            my_current => rs_current(igrid_level)%r
            IF (LBOUND(my_rho, 3) /= LBOUND(my_current, 3) .OR. &
                LBOUND(my_rho, 2) /= LBOUND(my_current, 2) .OR. &
                LBOUND(my_rho, 1) /= LBOUND(my_current, 1) .OR. &
                UBOUND(my_rho, 3) /= UBOUND(my_current, 3) .OR. &
                UBOUND(my_rho, 2) /= UBOUND(my_current, 2) .OR. &
                UBOUND(my_rho, 1) /= UBOUND(my_current, 1)) THEN
               WRITE (*, *) 'LBOUND(my_rho,3),LBOUND(my_current,3)', LBOUND(my_rho, 3), LBOUND(my_current, 3)
               WRITE (*, *) 'LBOUND(my_rho,2),LBOUND(my_current,2)', LBOUND(my_rho, 2), LBOUND(my_current, 2)
               WRITE (*, *) 'LBOUND(my_rho,1),LBOUND(my_current,1)', LBOUND(my_rho, 1), LBOUND(my_current, 1)
               WRITE (*, *) 'UBOUND(my_rho,3),UBOUND(my_current,3)', UBOUND(my_rho, 3), UBOUND(my_current, 3)
               WRITE (*, *) 'UBOUND(my_rho,2),UBOUND(my_current,2)', UBOUND(my_rho, 2), UBOUND(my_current, 2)
               WRITE (*, *) 'UBOUND(my_rho,1),UBOUND(my_current,1)', UBOUND(my_rho, 1), UBOUND(my_current, 1)
               CPABORT("Bug")
            END IF

            my_gauge => rs_gauge(1, igrid_level)%r
            IF (LBOUND(my_rho, 3) /= LBOUND(my_gauge, 3) .OR. &
                LBOUND(my_rho, 2) /= LBOUND(my_gauge, 2) .OR. &
                LBOUND(my_rho, 1) /= LBOUND(my_gauge, 1) .OR. &
                UBOUND(my_rho, 3) /= UBOUND(my_gauge, 3) .OR. &
                UBOUND(my_rho, 2) /= UBOUND(my_gauge, 2) .OR. &
                UBOUND(my_rho, 1) /= UBOUND(my_gauge, 1)) THEN
               WRITE (*, *) 'LBOUND(my_rho,3),LBOUND(my_gauge,3)', LBOUND(my_rho, 3), LBOUND(my_gauge, 3)
               WRITE (*, *) 'LBOUND(my_rho,2),LBOUND(my_gauge,2)', LBOUND(my_rho, 2), LBOUND(my_gauge, 2)
               WRITE (*, *) 'LBOUND(my_rho,1),LBOUND(my_gauge,1)', LBOUND(my_rho, 1), LBOUND(my_gauge, 1)
               WRITE (*, *) 'UBOUND(my_rho,3),UbOUND(my_gauge,3)', UBOUND(my_rho, 3), UBOUND(my_gauge, 3)
               WRITE (*, *) 'UBOUND(my_rho,2),UBOUND(my_gauge,2)', UBOUND(my_rho, 2), UBOUND(my_gauge, 2)
               WRITE (*, *) 'UBOUND(my_rho,1),UBOUND(my_gauge,1)', UBOUND(my_rho, 1), UBOUND(my_gauge, 1)
               CPABORT("Bug")
            END IF
         END DO
      END IF
      !
      !-------------------------------------------------------------

      IF (distributed_rs_grids) THEN
         CALL rs_distribute_matrix(rs_descs=rs_descs, pmats=deltajp_a, &
                                   atom_pair_send=atom_pair_send, atom_pair_recv=atom_pair_recv, &
                                   nimages=nimages, scatter=.TRUE.)
         CALL rs_distribute_matrix(rs_descs=rs_descs, pmats=deltajp_b, &
                                   atom_pair_send=atom_pair_send, atom_pair_recv=atom_pair_recv, &
                                   nimages=nimages, scatter=.TRUE.)
         CALL rs_distribute_matrix(rs_descs=rs_descs, pmats=deltajp_c, &
                                   atom_pair_send=atom_pair_send, atom_pair_recv=atom_pair_recv, &
                                   nimages=nimages, scatter=.TRUE.)
         IF (do_igaim) CALL rs_distribute_matrix(rs_descs=rs_descs, pmats=deltajp_d, &
                                                 atom_pair_send=atom_pair_send, atom_pair_recv=atom_pair_recv, &
                                                 nimages=nimages, scatter=.TRUE.)
      END IF

      ithread = 0
      jpab_a => jpabt_a(:, :, ithread)
      jpab_b => jpabt_b(:, :, ithread)
      jpab_c => jpabt_c(:, :, ithread)
      IF (do_igaim) jpab_d => jpabt_d(:, :, ithread)
      work => workt(:, :, ithread)

      iatom_old = -1; jatom_old = -1; iset_old = -1; jset_old = -1
      ikind_old = -1; jkind_old = -1

      loop_tasks: DO itask = 1, ntasks
         igrid_level = tasks(itask)%grid_level
         cindex = tasks(itask)%image
         iatom = tasks(itask)%iatom
         jatom = tasks(itask)%jatom
         iset = tasks(itask)%iset
         jset = tasks(itask)%jset
         ipgf = tasks(itask)%ipgf
         jpgf = tasks(itask)%jpgf

         ! apparently generalised collocation not implemented correctly yet
         CPASSERT(tasks(itask)%dist_type /= 2)

         IF (iatom /= iatom_old .OR. jatom /= jatom_old) THEN

            ikind = particle_set(iatom)%atomic_kind%kind_number
            jkind = particle_set(jatom)%atomic_kind%kind_number

            IF (iatom /= iatom_old) ra(:) = pbc(particle_set(iatom)%r, cell)

            brow = iatom
            bcol = jatom

            IF (ikind /= ikind_old) THEN
               CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set, &
                                basis_type=basis_type)

               CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
                                      first_sgf=first_sgfa, &
                                      lmax=la_max, &
                                      lmin=la_min, &
                                      npgf=npgfa, &
                                      nset=nseta, &
                                      nsgf_set=nsgfa, &
                                      pgf_radius=rpgfa, &
                                      set_radius=set_radius_a, &
                                      sphi=sphi_a, &
                                      zet=zeta)
            END IF

            IF (jkind /= jkind_old) THEN

               CALL get_qs_kind(qs_kind_set(jkind), &
                                basis_set=orb_basis_set, basis_type=basis_type)

               CALL get_gto_basis_set(gto_basis_set=orb_basis_set, &
                                      first_sgf=first_sgfb, &
                                      kind_radius=kind_radius_b, &
                                      lmax=lb_max, &
                                      lmin=lb_min, &
                                      npgf=npgfb, &
                                      nset=nsetb, &
                                      nsgf_set=nsgfb, &
                                      pgf_radius=rpgfb, &
                                      set_radius=set_radius_b, &
                                      sphi=sphi_b, &
                                      zet=zetb)

            END IF

            CALL dbcsr_get_block_p(matrix=deltajp_a(1)%matrix, row=brow, col=bcol, &
                                   block=jp_block_a, found=den_found)
            CALL dbcsr_get_block_p(matrix=deltajp_b(1)%matrix, row=brow, col=bcol, &
                                   block=jp_block_b, found=den_found)
            CALL dbcsr_get_block_p(matrix=deltajp_c(1)%matrix, row=brow, col=bcol, &
                                   block=jp_block_c, found=den_found)
            IF (do_igaim) CALL dbcsr_get_block_p(matrix=deltajp_d(1)%matrix, row=brow, col=bcol, &
                                                 block=jp_block_d, found=den_found)

            IF (.NOT. ASSOCIATED(jp_block_a)) &
               CPABORT("p_block not associated in deltap")

            iatom_old = iatom
            jatom_old = jatom
            ikind_old = ikind
            jkind_old = jkind

            atom_pair_changed = .TRUE.

         ELSE

            atom_pair_changed = .FALSE.

         END IF

         IF (atom_pair_changed .OR. iset_old /= iset .OR. jset_old /= jset) THEN

            ncoa = npgfa(iset)*ncoset(la_max(iset))
            sgfa = first_sgfa(1, iset)
            ncob = npgfb(jset)*ncoset(lb_max(jset))
            sgfb = first_sgfb(1, jset)
            ! Decontraction step for the selected blocks of the 3 density matrices

            CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), &
                       1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
                       jp_block_a(sgfa, sgfb), SIZE(jp_block_a, 1), &
                       0.0_dp, work(1, 1), maxco)
            CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), &
                       1.0_dp, work(1, 1), maxco, &
                       sphi_b(1, sgfb), SIZE(sphi_b, 1), &
                       0.0_dp, jpab_a(1, 1), maxco)

            CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), &
                       1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
                       jp_block_b(sgfa, sgfb), SIZE(jp_block_b, 1), &
                       0.0_dp, work(1, 1), maxco)
            CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), &
                       1.0_dp, work(1, 1), maxco, &
                       sphi_b(1, sgfb), SIZE(sphi_b, 1), &
                       0.0_dp, jpab_b(1, 1), maxco)

            CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), &
                       1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
                       jp_block_c(sgfa, sgfb), SIZE(jp_block_c, 1), &
                       0.0_dp, work(1, 1), maxco)
            CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), &
                       1.0_dp, work(1, 1), maxco, &
                       sphi_b(1, sgfb), SIZE(sphi_b, 1), &
                       0.0_dp, jpab_c(1, 1), maxco)

            IF (do_igaim) THEN
               CALL dgemm("N", "N", ncoa, nsgfb(jset), nsgfa(iset), &
                          1.0_dp, sphi_a(1, sgfa), SIZE(sphi_a, 1), &
                          jp_block_d(sgfa, sgfb), SIZE(jp_block_d, 1), &
                          0.0_dp, work(1, 1), maxco)
               CALL dgemm("N", "T", ncoa, ncob, nsgfb(jset), &
                          1.0_dp, work(1, 1), maxco, &
                          sphi_b(1, sgfb), SIZE(sphi_b, 1), &
                          0.0_dp, jpab_d(1, 1), maxco)
            END IF

            iset_old = iset
            jset_old = jset

         END IF

         SELECT CASE (idir)
         CASE (1)
            adbmdab_func = GRID_FUNC_ADBmDAB_X
         CASE (2)
            adbmdab_func = GRID_FUNC_ADBmDAB_Y
         CASE (3)
            adbmdab_func = GRID_FUNC_ADBmDAB_Z
         CASE DEFAULT
            CPABORT("invalid idir")
         END SELECT

         rab(:) = tasks(itask)%rab
         rb(:) = ra(:) + rab(:)
         zetp = zeta(ipgf, iset) + zetb(jpgf, jset)
         f = zetb(jpgf, jset)/zetp
         prefactor = EXP(-zeta(ipgf, iset)*f*DOT_PRODUCT(rab, rab))
         rp(:) = ra(:) + f*rab(:)

         na1 = (ipgf - 1)*ncoset(la_max(iset)) + 1
         na2 = ipgf*ncoset(la_max(iset))
         nb1 = (jpgf - 1)*ncoset(lb_max(jset)) + 1
         nb2 = jpgf*ncoset(lb_max(jset))

         ! Four calls to the general collocate density, to multply the correct function
         ! to each density matrix

         !
         ! here the decontracted mat_jp_{ab} is multiplied by
         !     f_{ab} = g_{a} (dg_{b}/dr)_{idir} - (dg_{a}/dr)_{idir} g_{b}
         scale = 1.0_dp
         radius = exp_radius_very_extended(la_min=la_min(iset), la_max=la_max(iset), &
                                           lb_min=lb_min(jset), lb_max=lb_max(jset), &
                                           ra=ra, rb=rb, rp=rp, zetp=zetp, eps=eps_rho_rspace, &
                                           prefactor=prefactor, cutoff=1.0_dp)

         CALL collocate_pgf_product(la_max(iset), zeta(ipgf, iset), &
                                    la_min(iset), lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
                                    ra, rab, scale, jpab_a, na1 - 1, nb1 - 1, &
                                    rs_current(igrid_level), &
                                    radius=radius, ga_gb_function=adbmdab_func)
         IF (do_igaim) THEN
            ! here the decontracted mat_jb_{ab} is multiplied by
            !     f_{ab} = g_{a} * g_{b} ! THIS GOES OUTSIDE THE LOOP !
            IF (scale2 /= 0.0_dp) THEN
               CALL collocate_pgf_product(la_max(iset), zeta(ipgf, iset), &
                                          la_min(iset), lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
                                          ra, rab, scale2, jpab_d, na1 - 1, nb1 - 1, &
                                          rs_rho(igrid_level), &
                                          radius=radius, ga_gb_function=GRID_FUNC_AB)
            END IF !rm
            ! here the decontracted mat_jp_rii{ab} is multiplied by
            !     f_{ab} = g_{a} (d(r) - R_{b})_{iiB} (dg_{b}/dr)_{idir} -
            !             (dg_{a}/dr)_{idir} (d(r) - R_{b})_{iiB} g_{b}
            scale = 1.0_dp
            current_env%rs_buf(igrid_level)%r(:, :, :) = 0.0_dp
            CALL collocate_pgf_product(la_max(iset), zeta(ipgf, iset), &
                                       la_min(iset), lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
                                       ra, rab, scale, jpab_b, na1 - 1, nb1 - 1, &
                                       radius=radius, &
                                       ga_gb_function=adbmdab_func, &
                                       rsgrid=current_env%rs_buf(igrid_level))
            CALL collocate_gauge_ortho(rsgrid=current_env%rs_buf(igrid_level), &
                                       rsbuf=rs_current(igrid_level), &
                                       rsgauge=rs_gauge(iiiB, igrid_level), &
                                       cube_info=cube_info(igrid_level), radius=radius, &
                                       zeta=zeta(ipgf, iset), zetb=zetb(jpgf, jset), &
                                       ra=ra, rab=rab, ir=iiiB)

            ! here the decontracted mat_jp_riii{ab} is multiplied by
            !     f_{ab} = -g_{a} (d(r) - R_{b})_{iiB} (dg_{b}/dr)_{idir} +
            !             (dg_{a}/dr)_{idir} (d(r) - R_{b})_{iiB} g_{b}
            scale = -1.0_dp
            current_env%rs_buf(igrid_level)%r(:, :, :) = 0.0_dp
            CALL collocate_pgf_product(la_max(iset), zeta(ipgf, iset), &
                                       la_min(iset), lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
                                       ra, rab, scale, jpab_c, na1 - 1, nb1 - 1, &
                                       radius=radius, &
                                       ga_gb_function=adbmdab_func, &
                                       rsgrid=current_env%rs_buf(igrid_level))
            CALL collocate_gauge_ortho(rsgrid=current_env%rs_buf(igrid_level), &
                                       rsbuf=rs_current(igrid_level), &
                                       rsgauge=rs_gauge(iiB, igrid_level), &
                                       cube_info=cube_info(igrid_level), radius=radius, &
                                       zeta=zeta(ipgf, iset), zetb=zetb(jpgf, jset), &
                                       ra=ra, rab=rab, ir=iiB)
         ELSE
            ! here the decontracted mat_jp_rii{ab} is multiplied by
            !     f_{ab} = g_{a} (r - R_{b})_{iiB} (dg_{b}/dr)_{idir} -
            !             (dg_{a}/dr)_{idir} (r - R_{b})_{iiB} g_{b}
            scale = 1.0_dp
            CALL collocate_pgf_product(la_max(iset), zeta(ipgf, iset), &
                                       la_min(iset), lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
                                       ra, rab, scale, jpab_b, na1 - 1, nb1 - 1, &
                                       rs_current(igrid_level), &
                                       radius=radius, &
                                       ga_gb_function=encode_ardbmdarb_func(idir=idir, ir=iiiB))
            ! here the decontracted mat_jp_riii{ab} is multiplied by
            !     f_{ab} = -g_{a} (r - R_{b})_{iiB} (dg_{b}/dr)_{idir} +
            !             (dg_{a}/dr)_{idir} (r - R_{b})_{iiB} g_{b}
            scale = -1.0_dp
            CALL collocate_pgf_product(la_max(iset), zeta(ipgf, iset), &
                                       la_min(iset), lb_max(jset), zetb(jpgf, jset), lb_min(jset), &
                                       ra, rab, scale, jpab_c, na1 - 1, nb1 - 1, &
                                       rs_current(igrid_level), &
                                       radius=radius, &
                                       ga_gb_function=encode_ardbmdarb_func(idir=idir, ir=iiB))
         END IF

      END DO loop_tasks
      !
      ! Scale the density with the gauge rho * ( r - d(r) ) if needed
      IF (do_igaim) THEN
         DO igrid_level = 1, gridlevel_info%ngrid_levels
            CALL rs_grid_mult_and_add(rs_current(igrid_level), rs_rho(igrid_level), &
                                      rs_gauge(idir2, igrid_level), 1.0_dp)
         END DO
      END IF
      !   *** Release work storage ***

      IF (distributed_rs_grids) THEN
         CALL dbcsr_deallocate_matrix(deltajp_a(1)%matrix)
         CALL dbcsr_deallocate_matrix(deltajp_b(1)%matrix)
         CALL dbcsr_deallocate_matrix(deltajp_c(1)%matrix)
         IF (do_igaim) CALL dbcsr_deallocate_matrix(deltajp_d(1)%matrix)
      END IF
      DEALLOCATE (deltajp_a, deltajp_b, deltajp_c, deltajp_d)

      DEALLOCATE (jpabt_a, jpabt_b, jpabt_c, jpabt_d, workt, tasks)

      IF (ASSOCIATED(atom_pair_send)) DEALLOCATE (atom_pair_send)
      IF (ASSOCIATED(atom_pair_recv)) DEALLOCATE (atom_pair_recv)

      CALL density_rs2pw(pw_env, rs_current, current_rs, current_gs)
      DO i = 1, SIZE(rs_current)
         CALL rs_grid_release(rs_current(i))
      END DO

      DO i = 1, SIZE(rs_rho)
         IF (rs_descs(i)%rs_desc%distributed .AND. .NOT. my_retain_rsgrid) THEN
            CALL rs_grid_release(rs_rho(i))
         END IF
      END DO

      ! Free the array of grids (grids themselves are released in density_rs2pw)
      DEALLOCATE (rs_current)

      CALL timestop(handle)

   END SUBROUTINE calculate_jrho_resp

! **************************************************************************************************
!> \brief ...
!> \param idir ...
!> \param ir ...
!> \return ...
! **************************************************************************************************
   FUNCTION encode_ardbmdarb_func(idir, ir) RESULT(func)
      INTEGER, INTENT(IN)                                :: idir, ir
      INTEGER                                            :: func

      CPASSERT(1 <= idir .AND. idir <= 3 .AND. 1 <= ir .AND. ir <= 3)
      SELECT CASE (10*idir + ir)
      CASE (11)
         func = GRID_FUNC_ARDBmDARB_XX
      CASE (12)
         func = GRID_FUNC_ARDBmDARB_XY
      CASE (13)
         func = GRID_FUNC_ARDBmDARB_XZ
      CASE (21)
         func = GRID_FUNC_ARDBmDARB_YX
      CASE (22)
         func = GRID_FUNC_ARDBmDARB_YY
      CASE (23)
         func = GRID_FUNC_ARDBmDARB_YZ
      CASE (31)
         func = GRID_FUNC_ARDBmDARB_ZX
      CASE (32)
         func = GRID_FUNC_ARDBmDARB_ZY
      CASE (33)
         func = GRID_FUNC_ARDBmDARB_ZZ
      CASE DEFAULT
         CPABORT("invalid idir or iiiB")
      END SELECT
   END FUNCTION encode_ardbmdarb_func

! **************************************************************************************************
!> \brief ...
!> \param rsgrid ...
!> \param rsbuf ...
!> \param rsgauge ...
!> \param cube_info ...
!> \param radius ...
!> \param ra ...
!> \param rab ...
!> \param zeta ...
!> \param zetb ...
!> \param ir ...
! **************************************************************************************************
   SUBROUTINE collocate_gauge_ortho(rsgrid, rsbuf, rsgauge, cube_info, radius, ra, rab, zeta, zetb, ir)
      TYPE(realspace_grid_type)                          :: rsgrid, rsbuf, rsgauge
      TYPE(cube_info_type), INTENT(IN)                   :: cube_info
      REAL(KIND=dp), INTENT(IN)                          :: radius
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: ra, rab
      REAL(KIND=dp), INTENT(IN)                          :: zeta, zetb
      INTEGER, INTENT(IN)                                :: ir

      INTEGER                                            :: cmax, i, ig, igmax, igmin, j, j2, jg, &
                                                            jg2, jgmin, k, k2, kg, kg2, kgmin, &
                                                            length, offset, sci, start
      INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: map
      INTEGER, DIMENSION(3)                              :: cubecenter, lb_cube, ng, ub_cube
      INTEGER, DIMENSION(:), POINTER                     :: sphere_bounds
      REAL(KIND=dp)                                      :: f, point(3, 4), res(4), x, y, y2, z, z2, &
                                                            zetp
      REAL(KIND=dp), DIMENSION(3)                        :: dr, rap, rb, rbp, roffset, rp
      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: gauge, grid, grid_buf

      CPASSERT(rsgrid%desc%orthorhombic)
      NULLIFY (sphere_bounds)

      grid => rsgrid%r(:, :, :)
      grid_buf => rsbuf%r(:, :, :)
      gauge => rsgauge%r(:, :, :)

      ! *** center of gaussians and their product
      zetp = zeta + zetb
      f = zetb/zetp
      rap(:) = f*rab(:)
      rbp(:) = rap(:) - rab(:)
      rp(:) = ra(:) + rap(:)
      rb(:) = ra(:) + rab(:)

      !   *** properties of the grid ***
      ng(:) = rsgrid%desc%npts(:)
      dr(1) = rsgrid%desc%dh(1, 1)
      dr(2) = rsgrid%desc%dh(2, 2)
      dr(3) = rsgrid%desc%dh(3, 3)

      !   *** get the sub grid properties for the given radius ***
      CALL return_cube(cube_info, radius, lb_cube, ub_cube, sphere_bounds)
      cmax = MAXVAL(ub_cube)

      !   *** position of the gaussian product
      !
      !   this is the actual definition of the position on the grid
      !   i.e. a point rp(:) gets here grid coordinates
      !   MODULO(rp(:)/dr(:),ng(:))+1
      !   hence (0.0,0.0,0.0) in real space is rsgrid%lb on the rsgrid ((1,1,1) on grid)
      !
      ALLOCATE (map(-cmax:cmax, 3))
      map(:, :) = -1
      CALL compute_cube_center(cubecenter, rsgrid%desc, zeta, zetb, ra, rab)
      roffset(:) = rp(:) - REAL(cubecenter(:), dp)*dr(:)

      !   *** a mapping so that the ig corresponds to the right grid point
      DO i = 1, 3
         IF (rsgrid%desc%perd(i) == 1) THEN
            start = lb_cube(i)
            DO
               offset = MODULO(cubecenter(i) + start, ng(i)) + 1 - start
               length = MIN(ub_cube(i), ng(i) - offset) - start
               DO ig = start, start + length
                  map(ig, i) = ig + offset
               END DO
               IF (start + length >= ub_cube(i)) EXIT
               start = start + length + 1
            END DO
         ELSE
            ! this takes partial grid + border regions into account
            offset = MODULO(cubecenter(i) + lb_cube(i) + rsgrid%desc%lb(i) - rsgrid%lb_local(i), ng(i)) + 1 - lb_cube(i)
            ! check for out of bounds
            IF (ub_cube(i) + offset > UBOUND(grid, i) .OR. lb_cube(i) + offset < LBOUND(grid, i)) THEN
               CPASSERT(.FALSE.)
            END IF
            DO ig = lb_cube(i), ub_cube(i)
               map(ig, i) = ig + offset
            END DO
         END IF
      END DO

      ! *** actually loop over the grid
      sci = 1
      kgmin = sphere_bounds(sci)
      sci = sci + 1
      DO kg = kgmin, 0
         kg2 = 1 - kg
         k = map(kg, 3)
         k2 = map(kg2, 3)
         jgmin = sphere_bounds(sci)
         sci = sci + 1
         z = (REAL(kg, dp) + REAL(cubecenter(3), dp))*dr(3)
         z2 = (REAL(kg2, dp) + REAL(cubecenter(3), dp))*dr(3)
         DO jg = jgmin, 0
            jg2 = 1 - jg
            j = map(jg, 2)
            j2 = map(jg2, 2)
            igmin = sphere_bounds(sci)
            sci = sci + 1
            igmax = 1 - igmin
            y = (REAL(jg, dp) + REAL(cubecenter(2), dp))*dr(2)
            y2 = (REAL(jg2, dp) + REAL(cubecenter(2), dp))*dr(2)
            DO ig = igmin, igmax
               i = map(ig, 1)
               x = (REAL(ig, dp) + REAL(cubecenter(1), dp))*dr(1)
               point(1, 1) = x; point(2, 1) = y; point(3, 1) = z
               point(1, 2) = x; point(2, 2) = y2; point(3, 2) = z
               point(1, 3) = x; point(2, 3) = y; point(3, 3) = z2
               point(1, 4) = x; point(2, 4) = y2; point(3, 4) = z2
               !
               res(1) = (point(ir, 1) - rb(ir)) - gauge(i, j, k)
               res(2) = (point(ir, 2) - rb(ir)) - gauge(i, j2, k)
               res(3) = (point(ir, 3) - rb(ir)) - gauge(i, j, k2)
               res(4) = (point(ir, 4) - rb(ir)) - gauge(i, j2, k2)
               !
               grid_buf(i, j, k) = grid_buf(i, j, k) + grid(i, j, k)*res(1)
               grid_buf(i, j2, k) = grid_buf(i, j2, k) + grid(i, j2, k)*res(2)
               grid_buf(i, j, k2) = grid_buf(i, j, k2) + grid(i, j, k2)*res(3)
               grid_buf(i, j2, k2) = grid_buf(i, j2, k2) + grid(i, j2, k2)*res(4)
            END DO
         END DO
      END DO
   END SUBROUTINE collocate_gauge_ortho

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
! **************************************************************************************************
   SUBROUTINE current_set_gauge(current_env, qs_env)
      !
      TYPE(current_env_type)                   :: current_env
      TYPE(qs_environment_type), POINTER       :: qs_env

      CHARACTER(LEN=*), PARAMETER :: routineN = 'current_set_gauge'

      INTEGER :: idir
      REAL(dp)                                 :: dbox(3)
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)    :: box_data
      INTEGER                                  :: handle, igrid_level, nbox(3), gauge
      INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: box_ptr
      TYPE(realspace_grid_desc_p_type), DIMENSION(:), &
         POINTER                                :: rs_descs
      TYPE(pw_env_type), POINTER               :: pw_env
      TYPE(realspace_grid_type), DIMENSION(:, :), POINTER :: rs_gauge

      TYPE(box_type), DIMENSION(:, :, :), POINTER :: box
      LOGICAL                                   :: use_old_gauge_atom

      NULLIFY (rs_gauge, box)

      CALL timeset(routineN, handle)

      CALL get_current_env(current_env=current_env, &
                           use_old_gauge_atom=use_old_gauge_atom, &
                           rs_gauge=rs_gauge, &
                           gauge=gauge)

      IF (gauge == current_gauge_atom) THEN
         CALL get_qs_env(qs_env=qs_env, &
                         pw_env=pw_env)
         CALL pw_env_get(pw_env=pw_env, &
                         rs_descs=rs_descs)
         !
         ! box the atoms
         IF (use_old_gauge_atom) THEN
            CALL box_atoms(qs_env)
         ELSE
            CALL box_atoms_new(current_env, qs_env, box)
         END IF
         !
         ! allocate and build the gauge
         DO igrid_level = pw_env%gridlevel_info%ngrid_levels, 1, -1

            DO idir = 1, 3
               CALL rs_grid_create(rs_gauge(idir, igrid_level), rs_descs(igrid_level)%rs_desc)
            END DO

            IF (use_old_gauge_atom) THEN
               CALL collocate_gauge(current_env, qs_env, &
                                    rs_gauge(1, igrid_level), &
                                    rs_gauge(2, igrid_level), &
                                    rs_gauge(3, igrid_level))
            ELSE
               CALL collocate_gauge_new(current_env, qs_env, &
                                        rs_gauge(1, igrid_level), &
                                        rs_gauge(2, igrid_level), &
                                        rs_gauge(3, igrid_level), &
                                        box)
            END IF
         END DO
         !
         ! allocate the buf
         ALLOCATE (current_env%rs_buf(pw_env%gridlevel_info%ngrid_levels))
         DO igrid_level = 1, pw_env%gridlevel_info%ngrid_levels
            CALL rs_grid_create(current_env%rs_buf(igrid_level), rs_descs(igrid_level)%rs_desc)
         END DO
         !
         DEALLOCATE (box_ptr, box_data)
         CALL deallocate_box(box)
      END IF

      CALL timestop(handle)

   CONTAINS

! **************************************************************************************************
!> \brief ...
!> \param qs_env ...
! **************************************************************************************************
      SUBROUTINE box_atoms(qs_env)
      TYPE(qs_environment_type), POINTER                 :: qs_env

      REAL(kind=dp), PARAMETER                           :: box_size_guess = 5.0_dp

      INTEGER                                            :: i, iatom, ibox, ii, jbox, kbox, natms
      REAL(dp)                                           :: offset(3)
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: ratom
      TYPE(cell_type), POINTER                           :: cell
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set

         CALL get_qs_env(qs_env=qs_env, &
                         qs_kind_set=qs_kind_set, &
                         cell=cell, &
                         particle_set=particle_set)

         natms = SIZE(particle_set, 1)
         ALLOCATE (ratom(3, natms))
         !
         ! box the atoms
         nbox(1) = CEILING(cell%hmat(1, 1)/box_size_guess)
         nbox(2) = CEILING(cell%hmat(2, 2)/box_size_guess)
         nbox(3) = CEILING(cell%hmat(3, 3)/box_size_guess)
         !write(*,*) 'nbox',nbox
         dbox(1) = cell%hmat(1, 1)/REAL(nbox(1), dp)
         dbox(2) = cell%hmat(2, 2)/REAL(nbox(2), dp)
         dbox(3) = cell%hmat(3, 3)/REAL(nbox(3), dp)
         !write(*,*) 'dbox',dbox
         ALLOCATE (box_ptr(0:nbox(1), 0:nbox(2) - 1, 0:nbox(3) - 1), box_data(3, natms))
         box_data(:, :) = HUGE(0.0_dp)
         box_ptr(:, :, :) = HUGE(0)
         !
         offset(1) = cell%hmat(1, 1)*0.5_dp
         offset(2) = cell%hmat(2, 2)*0.5_dp
         offset(3) = cell%hmat(3, 3)*0.5_dp
         DO iatom = 1, natms
            ratom(:, iatom) = pbc(particle_set(iatom)%r(:), cell) + offset(:)
         END DO
         !
         i = 1
         DO kbox = 0, nbox(3) - 1
         DO jbox = 0, nbox(2) - 1
            box_ptr(0, jbox, kbox) = i
            DO ibox = 0, nbox(1) - 1
               ii = 0
               DO iatom = 1, natms
                  IF (INT(ratom(1, iatom)/dbox(1)) == ibox .AND. &
                      INT(ratom(2, iatom)/dbox(2)) == jbox .AND. &
                      INT(ratom(3, iatom)/dbox(3)) == kbox) THEN
                     box_data(:, i) = ratom(:, iatom) - offset(:)
                     i = i + 1
                     ii = ii + 1
                  END IF
               END DO
               box_ptr(ibox + 1, jbox, kbox) = box_ptr(ibox, jbox, kbox) + ii
            END DO
         END DO
         END DO
         !
         IF (.FALSE.) THEN
            DO kbox = 0, nbox(3) - 1
            DO jbox = 0, nbox(2) - 1
            DO ibox = 0, nbox(1) - 1
               WRITE (*, *) 'box=', ibox, jbox, kbox
               WRITE (*, *) 'nbr atom=', box_ptr(ibox + 1, jbox, kbox) - box_ptr(ibox, jbox, kbox)
               DO iatom = box_ptr(ibox, jbox, kbox), box_ptr(ibox + 1, jbox, kbox) - 1
                  WRITE (*, *) 'iatom=', iatom
                  WRITE (*, '(A,3E14.6)') 'coor=', box_data(:, iatom)
               END DO
            END DO
            END DO
            END DO
         END IF
         DEALLOCATE (ratom)
      END SUBROUTINE box_atoms

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
!> \param rs_grid_x ...
!> \param rs_grid_y ...
!> \param rs_grid_z ...
! **************************************************************************************************
      SUBROUTINE collocate_gauge(current_env, qs_env, rs_grid_x, rs_grid_y, rs_grid_z)
         !
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(realspace_grid_type), INTENT(IN)              :: rs_grid_x, rs_grid_y, rs_grid_z

      INTEGER                                            :: i, iatom, ibeg, ibox, iend, imax, imin, &
                                                            j, jatom, jbox, jmax, jmin, k, kbox, &
                                                            kmax, kmin, lb(3), lb_local(3), natms, &
                                                            natms_local, ng(3)
      REAL(KIND=dp)                                      :: ab, buf_tmp, dist, dr(3), &
                                                            gauge_atom_radius, offset(3), pa, pb, &
                                                            point(3), pra(3), r(3), res(3), summe, &
                                                            tmp, x, y, z
      REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: buf, nrm_atms_pnt
      REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :)        :: atms_pnt, ratom
      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: grid_x, grid_y, grid_z
      TYPE(cell_type), POINTER                           :: cell
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set

!

         CALL get_current_env(current_env=current_env, &
                              gauge_atom_radius=gauge_atom_radius)
         !
         CALL get_qs_env(qs_env=qs_env, &
                         qs_kind_set=qs_kind_set, &
                         cell=cell, &
                         particle_set=particle_set)
         !
         natms = SIZE(particle_set, 1)
         dr(1) = rs_grid_x%desc%dh(1, 1)
         dr(2) = rs_grid_x%desc%dh(2, 2)
         dr(3) = rs_grid_x%desc%dh(3, 3)
         lb(:) = rs_grid_x%desc%lb(:)
         lb_local(:) = rs_grid_x%lb_local(:)
         grid_x => rs_grid_x%r(:, :, :)
         grid_y => rs_grid_y%r(:, :, :)
         grid_z => rs_grid_z%r(:, :, :)
         ng(:) = UBOUND(grid_x)
         offset(1) = cell%hmat(1, 1)*0.5_dp
         offset(2) = cell%hmat(2, 2)*0.5_dp
         offset(3) = cell%hmat(3, 3)*0.5_dp
         ALLOCATE (buf(natms), ratom(3, natms), atms_pnt(3, natms), nrm_atms_pnt(natms))
         !
         ! go over the grid
         DO k = 1, ng(3)
            DO j = 1, ng(2)
               DO i = 1, ng(1)
                  !
                  point(3) = REAL(k - 1 + lb_local(3) - lb(3), dp)*dr(3)
                  point(2) = REAL(j - 1 + lb_local(2) - lb(2), dp)*dr(2)
                  point(1) = REAL(i - 1 + lb_local(1) - lb(1), dp)*dr(1)
                  point = pbc(point, cell)
                  !
                  ! run over the overlaping boxes
                  natms_local = 0
                  kmin = INT((point(3) + offset(3) - gauge_atom_radius)/dbox(3))
                  kmax = INT((point(3) + offset(3) + gauge_atom_radius)/dbox(3))
                  IF (kmax - kmin + 1 > nbox(3)) THEN
                     kmin = 0
                     kmax = nbox(3) - 1
                  END IF
                  DO kbox = kmin, kmax
                     jmin = INT((point(2) + offset(2) - gauge_atom_radius)/dbox(2))
                     jmax = INT((point(2) + offset(2) + gauge_atom_radius)/dbox(2))
                     IF (jmax - jmin + 1 > nbox(2)) THEN
                        jmin = 0
                        jmax = nbox(2) - 1
                     END IF
                     DO jbox = jmin, jmax
                        imin = INT((point(1) + offset(1) - gauge_atom_radius)/dbox(1))
                        imax = INT((point(1) + offset(1) + gauge_atom_radius)/dbox(1))
                        IF (imax - imin + 1 > nbox(1)) THEN
                           imin = 0
                           imax = nbox(1) - 1
                        END IF
                        DO ibox = imin, imax
                           ibeg = box_ptr(MODULO(ibox, nbox(1)), MODULO(jbox, nbox(2)), MODULO(kbox, nbox(3)))
                           iend = box_ptr(MODULO(ibox, nbox(1)) + 1, MODULO(jbox, nbox(2)), MODULO(kbox, nbox(3))) - 1
                           DO iatom = ibeg, iend
                              r(:) = pbc(box_data(:, iatom) - point(:), cell) + point(:)
                              dist = (r(1) - point(1))**2 + (r(2) - point(2))**2 + (r(3) - point(3))**2
                              IF (dist < gauge_atom_radius**2) THEN
                                 natms_local = natms_local + 1
                                 ratom(:, natms_local) = r(:)
                                 !
                                 ! compute the distance atoms-point
                                 x = point(1) - r(1)
                                 y = point(2) - r(2)
                                 z = point(3) - r(3)
                                 atms_pnt(1, natms_local) = x
                                 atms_pnt(2, natms_local) = y
                                 atms_pnt(3, natms_local) = z
                                 nrm_atms_pnt(natms_local) = SQRT(x*x + y*y + z*z)
                              END IF
                           END DO
                        END DO
                     END DO
                  END DO
                  !
                  IF (natms_local > 0) THEN
                     !
                     !
                     DO iatom = 1, natms_local
                        buf_tmp = 1.0_dp
                        pra(1) = atms_pnt(1, iatom)
                        pra(2) = atms_pnt(2, iatom)
                        pra(3) = atms_pnt(3, iatom)
                        pa = nrm_atms_pnt(iatom)
                        DO jatom = 1, natms_local
                           IF (iatom == jatom) CYCLE
                           pb = nrm_atms_pnt(jatom)
                           x = pra(1) - atms_pnt(1, jatom)
                           y = pra(2) - atms_pnt(2, jatom)
                           z = pra(3) - atms_pnt(3, jatom)
                           ab = SQRT(x*x + y*y + z*z)
                           !
                           tmp = (pa - pb)/ab
                           tmp = 0.5_dp*(3.0_dp - tmp*tmp)*tmp
                           tmp = 0.5_dp*(3.0_dp - tmp*tmp)*tmp
                           tmp = 0.5_dp*(3.0_dp - tmp*tmp)*tmp
                           buf_tmp = buf_tmp*0.5_dp*(1.0_dp - tmp)
                        END DO
                        buf(iatom) = buf_tmp
                     END DO
                     res(1) = 0.0_dp
                     res(2) = 0.0_dp
                     res(3) = 0.0_dp
                     summe = 0.0_dp
                     DO iatom = 1, natms_local
                        res(1) = res(1) + ratom(1, iatom)*buf(iatom)
                        res(2) = res(2) + ratom(2, iatom)*buf(iatom)
                        res(3) = res(3) + ratom(3, iatom)*buf(iatom)
                        summe = summe + buf(iatom)
                     END DO
                     res(1) = res(1)/summe
                     res(2) = res(2)/summe
                     res(3) = res(3)/summe
                     grid_x(i, j, k) = point(1) - res(1)
                     grid_y(i, j, k) = point(2) - res(2)
                     grid_z(i, j, k) = point(3) - res(3)
                  ELSE
                     grid_x(i, j, k) = 0.0_dp
                     grid_y(i, j, k) = 0.0_dp
                     grid_z(i, j, k) = 0.0_dp
                  END IF
               END DO
            END DO
         END DO

         DEALLOCATE (buf, ratom, atms_pnt, nrm_atms_pnt)

      END SUBROUTINE collocate_gauge

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
!> \param box ...
! **************************************************************************************************
      SUBROUTINE box_atoms_new(current_env, qs_env, box)
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(box_type), DIMENSION(:, :, :), POINTER        :: box

      CHARACTER(LEN=*), PARAMETER                        :: routineN = 'box_atoms_new'

      INTEGER                                            :: handle, i, iatom, ibeg, ibox, iend, &
                                                            ifind, ii, imax, imin, j, jatom, jbox, &
                                                            jmax, jmin, k, kbox, kmax, kmin, &
                                                            natms, natms_local
      REAL(dp)                                           :: gauge_atom_radius, offset(3), scale
      REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: ratom
      REAL(dp), DIMENSION(:, :), POINTER                 :: r_ptr
      REAL(kind=dp)                                      :: box_center(3), box_center_wrap(3), &
                                                            box_size_guess, r(3)
      TYPE(cell_type), POINTER                           :: cell
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set

         CALL timeset(routineN, handle)

         CALL get_qs_env(qs_env=qs_env, &
                         qs_kind_set=qs_kind_set, &
                         cell=cell, &
                         particle_set=particle_set)

         CALL get_current_env(current_env=current_env, &
                              gauge_atom_radius=gauge_atom_radius)

         scale = 2.0_dp

         box_size_guess = gauge_atom_radius/scale

         natms = SIZE(particle_set, 1)
         ALLOCATE (ratom(3, natms))

         !
         ! box the atoms
         nbox(1) = CEILING(cell%hmat(1, 1)/box_size_guess)
         nbox(2) = CEILING(cell%hmat(2, 2)/box_size_guess)
         nbox(3) = CEILING(cell%hmat(3, 3)/box_size_guess)
         dbox(1) = cell%hmat(1, 1)/REAL(nbox(1), dp)
         dbox(2) = cell%hmat(2, 2)/REAL(nbox(2), dp)
         dbox(3) = cell%hmat(3, 3)/REAL(nbox(3), dp)
         ALLOCATE (box_ptr(0:nbox(1), 0:nbox(2) - 1, 0:nbox(3) - 1), box_data(3, natms))
         box_data(:, :) = HUGE(0.0_dp)
         box_ptr(:, :, :) = HUGE(0)
         !
         offset(1) = cell%hmat(1, 1)*0.5_dp
         offset(2) = cell%hmat(2, 2)*0.5_dp
         offset(3) = cell%hmat(3, 3)*0.5_dp
         DO iatom = 1, natms
            ratom(:, iatom) = pbc(particle_set(iatom)%r(:), cell)
         END DO
         !
         i = 1
         DO kbox = 0, nbox(3) - 1
         DO jbox = 0, nbox(2) - 1
            box_ptr(0, jbox, kbox) = i
            DO ibox = 0, nbox(1) - 1
               ii = 0
               DO iatom = 1, natms
                  IF (MODULO(FLOOR(ratom(1, iatom)/dbox(1)), nbox(1)) == ibox .AND. &
                      MODULO(FLOOR(ratom(2, iatom)/dbox(2)), nbox(2)) == jbox .AND. &
                      MODULO(FLOOR(ratom(3, iatom)/dbox(3)), nbox(3)) == kbox) THEN
                     box_data(:, i) = ratom(:, iatom)
                     i = i + 1
                     ii = ii + 1
                  END IF
               END DO
               box_ptr(ibox + 1, jbox, kbox) = box_ptr(ibox, jbox, kbox) + ii
            END DO
         END DO
         END DO
         !
         IF (.FALSE.) THEN
            DO kbox = 0, nbox(3) - 1
            DO jbox = 0, nbox(2) - 1
            DO ibox = 0, nbox(1) - 1
               IF (box_ptr(ibox + 1, jbox, kbox) - box_ptr(ibox, jbox, kbox) > 0) THEN
                  WRITE (*, *) 'box=', ibox, jbox, kbox
                  WRITE (*, *) 'nbr atom=', box_ptr(ibox + 1, jbox, kbox) - box_ptr(ibox, jbox, kbox)
                  DO iatom = box_ptr(ibox, jbox, kbox), box_ptr(ibox + 1, jbox, kbox) - 1
                     WRITE (*, '(A,I3,3E14.6)') 'coor=', iatom, box_data(:, iatom)
                  END DO
               END IF
            END DO
            END DO
            END DO
         END IF
         !
         NULLIFY (box)
         ALLOCATE (box(0:nbox(1) - 1, 0:nbox(2) - 1, 0:nbox(3) - 1))
         !
         ! build the list
         DO k = 0, nbox(3) - 1
         DO j = 0, nbox(2) - 1
         DO i = 0, nbox(1) - 1
            !
            box_center(1) = (REAL(i, dp) + 0.5_dp)*dbox(1)
            box_center(2) = (REAL(j, dp) + 0.5_dp)*dbox(2)
            box_center(3) = (REAL(k, dp) + 0.5_dp)*dbox(3)
            box_center_wrap = pbc(box_center, cell)
            !
            ! find the atoms that are in the overlaping boxes
            natms_local = 0
            kmin = FLOOR((box_center(3) - gauge_atom_radius)/dbox(3))
            kmax = FLOOR((box_center(3) + gauge_atom_radius)/dbox(3))
            IF (kmax - kmin + 1 > nbox(3)) THEN
               kmin = 0
               kmax = nbox(3) - 1
            END IF
            DO kbox = kmin, kmax
               jmin = FLOOR((box_center(2) - gauge_atom_radius)/dbox(2))
               jmax = FLOOR((box_center(2) + gauge_atom_radius)/dbox(2))
               IF (jmax - jmin + 1 > nbox(2)) THEN
                  jmin = 0
                  jmax = nbox(2) - 1
               END IF
               DO jbox = jmin, jmax
                  imin = FLOOR((box_center(1) - gauge_atom_radius)/dbox(1))
                  imax = FLOOR((box_center(1) + gauge_atom_radius)/dbox(1))
                  IF (imax - imin + 1 > nbox(1)) THEN
                     imin = 0
                     imax = nbox(1) - 1
                  END IF
                  DO ibox = imin, imax
                     ibeg = box_ptr(MODULO(ibox, nbox(1)), MODULO(jbox, nbox(2)), MODULO(kbox, nbox(3)))
                     iend = box_ptr(MODULO(ibox, nbox(1)) + 1, MODULO(jbox, nbox(2)), MODULO(kbox, nbox(3))) - 1
                     DO iatom = ibeg, iend
                        r = pbc(box_center_wrap(:) - box_data(:, iatom), cell)
                        IF (ABS(r(1)) <= (scale + 0.5_dp)*dbox(1) .AND. &
                            ABS(r(2)) <= (scale + 0.5_dp)*dbox(2) .AND. &
                            ABS(r(3)) <= (scale + 0.5_dp)*dbox(3)) THEN
                           natms_local = natms_local + 1
                           ratom(:, natms_local) = box_data(:, iatom)
                        END IF
                     END DO
                  END DO ! box
               END DO
            END DO
            !
            ! set the list
            box(i, j, k)%n = natms_local
            NULLIFY (box(i, j, k)%r)
            IF (natms_local > 0) THEN
               ALLOCATE (box(i, j, k)%r(3, natms_local))
               r_ptr => box(i, j, k)%r
               CALL dcopy(3*natms_local, ratom(1, 1), 1, r_ptr(1, 1), 1)
            END IF
         END DO ! list
         END DO
         END DO

         IF (.FALSE.) THEN
            DO k = 0, nbox(3) - 1
            DO j = 0, nbox(2) - 1
            DO i = 0, nbox(1) - 1
               IF (box(i, j, k)%n > 0) THEN
                  WRITE (*, *)
                  WRITE (*, *) 'box=', i, j, k
                  box_center(1) = (REAL(i, dp) + 0.5_dp)*dbox(1)
                  box_center(2) = (REAL(j, dp) + 0.5_dp)*dbox(2)
                  box_center(3) = (REAL(k, dp) + 0.5_dp)*dbox(3)
                  box_center = pbc(box_center, cell)
                  WRITE (*, '(A,3E14.6)') 'box_center=', box_center
                  WRITE (*, *) 'nbr atom=', box(i, j, k)%n
                  r_ptr => box(i, j, k)%r
                  DO iatom = 1, box(i, j, k)%n
                     WRITE (*, '(A,I3,3E14.6)') 'coor=', iatom, r_ptr(:, iatom)
                     r(:) = pbc(box_center(:) - r_ptr(:, iatom), cell)
                     IF (ABS(r(1)) > (scale + 0.5_dp)*dbox(1) .OR. &
                         ABS(r(2)) > (scale + 0.5_dp)*dbox(2) .OR. &
                         ABS(r(3)) > (scale + 0.5_dp)*dbox(3)) THEN
                        WRITE (*, *) 'error too many atoms'
                        WRITE (*, *) 'dist=', ABS(r(:))
                        WRITE (*, *) 'large_dist=', (scale + 0.5_dp)*dbox
                        CPABORT("")
                     END IF
                  END DO
               END IF
            END DO ! list
            END DO
            END DO
         END IF

         IF (.TRUE.) THEN
            DO k = 0, nbox(3) - 1
            DO j = 0, nbox(2) - 1
            DO i = 0, nbox(1) - 1
               box_center(1) = (REAL(i, dp) + 0.5_dp)*dbox(1)
               box_center(2) = (REAL(j, dp) + 0.5_dp)*dbox(2)
               box_center(3) = (REAL(k, dp) + 0.5_dp)*dbox(3)
               box_center = pbc(box_center, cell)
               r_ptr => box(i, j, k)%r
               DO iatom = 1, natms
                  r(:) = pbc(box_center(:) - ratom(:, iatom), cell)
                  ifind = 0
                  DO jatom = 1, box(i, j, k)%n
                     IF (SUM(ABS(ratom(:, iatom) - r_ptr(:, jatom))) < 1E-10_dp) ifind = 1
                  END DO

                  IF (ifind == 0) THEN
                     ! SQRT(DOT_PRODUCT(r, r)) < gauge_atom_radius
                     IF (DOT_PRODUCT(r, r) < (gauge_atom_radius*gauge_atom_radius)) THEN
                        WRITE (*, *) 'error atom too close'
                        WRITE (*, *) 'iatom', iatom
                        WRITE (*, *) 'box_center', box_center
                        WRITE (*, *) 'ratom', ratom(:, iatom)
                        WRITE (*, *) 'gauge_atom_radius', gauge_atom_radius
                        CPABORT("")
                     END IF
                  END IF
               END DO
            END DO ! list
            END DO
            END DO
         END IF

         DEALLOCATE (ratom)

         CALL timestop(handle)

      END SUBROUTINE box_atoms_new

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
!> \param rs_grid_x ...
!> \param rs_grid_y ...
!> \param rs_grid_z ...
!> \param box ...
! **************************************************************************************************
      SUBROUTINE collocate_gauge_new(current_env, qs_env, rs_grid_x, rs_grid_y, rs_grid_z, box)
         !
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      TYPE(realspace_grid_type), INTENT(IN)              :: rs_grid_x, rs_grid_y, rs_grid_z
      TYPE(box_type), DIMENSION(:, :, :), POINTER        :: box

      CHARACTER(LEN=*), PARAMETER :: routineN = 'collocate_gauge_new'

      INTEGER :: delta_lb(3), handle, i, iatom, ib, ibe, ibox, ibs, ie, is, j, jatom, jb, jbe, &
         jbox, jbs, je, js, k, kb, kbe, kbox, kbs, ke, ks, lb(3), lb_local(3), natms, &
         natms_local0, natms_local1, ng(3)
      REAL(dp), DIMENSION(:, :), POINTER                 :: r_ptr
      REAL(KIND=dp)                                      :: ab, box_center(3), buf_tmp, dist, dr(3), &
                                                            gauge_atom_radius, offset(3), pa, pb, &
                                                            point(3), pra(3), r(3), res(3), summe, &
                                                            tmp, x, y, z
      REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: buf, nrm_atms_pnt
      REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :)        :: atms_pnt, ratom
      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: grid_x, grid_y, grid_z
      TYPE(cell_type), POINTER                           :: cell
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set

         CALL timeset(routineN, handle)

!
         CALL get_current_env(current_env=current_env, &
                              gauge_atom_radius=gauge_atom_radius)
         !
         CALL get_qs_env(qs_env=qs_env, &
                         qs_kind_set=qs_kind_set, &
                         cell=cell, &
                         particle_set=particle_set)
         !
         natms = SIZE(particle_set, 1)
         dr(1) = rs_grid_x%desc%dh(1, 1)
         dr(2) = rs_grid_x%desc%dh(2, 2)
         dr(3) = rs_grid_x%desc%dh(3, 3)
         lb(:) = rs_grid_x%desc%lb(:)
         lb_local(:) = rs_grid_x%lb_local(:)
         grid_x => rs_grid_x%r(:, :, :)
         grid_y => rs_grid_y%r(:, :, :)
         grid_z => rs_grid_z%r(:, :, :)
         ng(:) = UBOUND(grid_x)
         delta_lb(:) = lb_local(:) - lb(:)
         offset(1) = cell%hmat(1, 1)*0.5_dp
         offset(2) = cell%hmat(2, 2)*0.5_dp
         offset(3) = cell%hmat(3, 3)*0.5_dp
         ALLOCATE (buf(natms), ratom(3, natms), atms_pnt(3, natms), nrm_atms_pnt(natms))
         !
         ! find the boxes that match the grid
         ibs = FLOOR(REAL(delta_lb(1), dp)*dr(1)/dbox(1))
         ibe = FLOOR(REAL(ng(1) - 1 + delta_lb(1), dp)*dr(1)/dbox(1))
         jbs = FLOOR(REAL(delta_lb(2), dp)*dr(2)/dbox(2))
         jbe = FLOOR(REAL(ng(2) - 1 + delta_lb(2), dp)*dr(2)/dbox(2))
         kbs = FLOOR(REAL(delta_lb(3), dp)*dr(3)/dbox(3))
         kbe = FLOOR(REAL(ng(3) - 1 + delta_lb(3), dp)*dr(3)/dbox(3))
         !
         ! go over the box-list
         DO kb = kbs, kbe
         DO jb = jbs, jbe
         DO ib = ibs, ibe
            ibox = MODULO(ib, nbox(1))
            jbox = MODULO(jb, nbox(2))
            kbox = MODULO(kb, nbox(3))
            !
            is = MAX(CEILING(REAL(ib, dp)*dbox(1)/dr(1)), delta_lb(1)) - delta_lb(1) + 1
            ie = MIN(FLOOR(REAL(ib + 1, dp)*dbox(1)/dr(1)), ng(1) - 1 + delta_lb(1)) - delta_lb(1) + 1
            js = MAX(CEILING(REAL(jb, dp)*dbox(2)/dr(2)), delta_lb(2)) - delta_lb(2) + 1
            je = MIN(FLOOR(REAL(jb + 1, dp)*dbox(2)/dr(2)), ng(2) - 1 + delta_lb(2)) - delta_lb(2) + 1
            ks = MAX(CEILING(REAL(kb, dp)*dbox(3)/dr(3)), delta_lb(3)) - delta_lb(3) + 1
            ke = MIN(FLOOR(REAL(kb + 1, dp)*dbox(3)/dr(3)), ng(3) - 1 + delta_lb(3)) - delta_lb(3) + 1
            !
            ! sanity checks
            IF (.TRUE.) THEN
               IF (REAL(ks - 1 + delta_lb(3), dp)*dr(3) < REAL(kb, dp)*dbox(3) .OR. &
                   REAL(ke - 1 + delta_lb(3), dp)*dr(3) > REAL(kb + 1, dp)*dbox(3)) THEN
                  WRITE (*, *) 'box_k', REAL(kb, dp)*dbox(3), REAL(kb + 1, dp)*dbox(3)
                  WRITE (*, *) 'point_k', REAL(ks - 1 + delta_lb(3), dp)*dr(3), REAL(ke - 1 + delta_lb(3), dp)*dr(3)
                  WRITE (*, *) 'ibox', ibox, 'jbox', jbox, 'kbox', kbox
                  WRITE (*, *) 'is,ie', is, ie, ' js,je', js, je, ' ks,ke', ks, ke
                  WRITE (*, *) 'ibs,ibe', ibs, ibe, ' jbs,jbe', jbs, jbe, ' kbs,kbe', kbs, kbe
                  CPABORT("we stop_k")
               END IF
               IF (REAL(js - 1 + delta_lb(2), dp)*dr(2) < REAL(jb, dp)*dbox(2) .OR. &
                   REAL(je - 1 + delta_lb(2), dp)*dr(2) > REAL(jb + 1, dp)*dbox(2)) THEN
                  WRITE (*, *) 'box_j', REAL(jb, dp)*dbox(2), REAL(jb + 1, dp)*dbox(2)
                  WRITE (*, *) 'point_j', REAL(js - 1 + delta_lb(2), dp)*dr(2), REAL(je - 1 + delta_lb(2), dp)*dr(2)
                  WRITE (*, *) 'is,ie', is, ie, ' js,je', js, je, ' ks,ke', ks, ke
                  WRITE (*, *) 'ibs,ibe', ibs, ibe, ' jbs,jbe', jbs, jbe, ' kbs,kbe', kbs, kbe
                  CPABORT("we stop_j")
               END IF
               IF (REAL(is - 1 + delta_lb(1), dp)*dr(1) < REAL(ib, dp)*dbox(1) .OR. &
                   REAL(ie - 1 + delta_lb(1), dp)*dr(1) > REAL(ib + 1, dp)*dbox(1)) THEN
                  WRITE (*, *) 'box_i', REAL(ib, dp)*dbox(1), REAL(ib + 1, dp)*dbox(1)
                  WRITE (*, *) 'point_i', REAL(is - 1 + delta_lb(1), dp)*dr(1), REAL(ie - 1 + delta_lb(1), dp)*dr(1)
                  WRITE (*, *) 'is,ie', is, ie, ' js,je', js, je, ' ks,ke', ks, ke
                  WRITE (*, *) 'ibs,ibe', ibs, ibe, ' jbs,jbe', jbs, jbe, ' kbs,kbe', kbs, kbe
                  CPABORT("we stop_i")
               END IF
            END IF
            !
            ! the center of the box
            box_center(1) = (REAL(ibox, dp) + 0.5_dp)*dbox(1)
            box_center(2) = (REAL(jbox, dp) + 0.5_dp)*dbox(2)
            box_center(3) = (REAL(kbox, dp) + 0.5_dp)*dbox(3)
            !
            ! find the atoms that are in the overlaping boxes
            natms_local0 = box(ibox, jbox, kbox)%n
            r_ptr => box(ibox, jbox, kbox)%r
            !
            ! go over the grid inside the box
            IF (natms_local0 > 0) THEN
               !
               ! here there are some atoms...
               DO k = ks, ke
               DO j = js, je
               DO i = is, ie
                  point(1) = REAL(i - 1 + delta_lb(1), dp)*dr(1)
                  point(2) = REAL(j - 1 + delta_lb(2), dp)*dr(2)
                  point(3) = REAL(k - 1 + delta_lb(3), dp)*dr(3)
                  point = pbc(point, cell)
                  !
                  ! compute atom-point distances
                  natms_local1 = 0
                  DO iatom = 1, natms_local0
                     r(:) = pbc(r_ptr(:, iatom) - point(:), cell) + point(:) !needed?
                     dist = (r(1) - point(1))**2 + (r(2) - point(2))**2 + (r(3) - point(3))**2
                     IF (dist < gauge_atom_radius**2) THEN
                        natms_local1 = natms_local1 + 1
                        ratom(:, natms_local1) = r(:)
                        !
                        ! compute the distance atoms-point
                        x = point(1) - r(1)
                        y = point(2) - r(2)
                        z = point(3) - r(3)
                        atms_pnt(1, natms_local1) = x
                        atms_pnt(2, natms_local1) = y
                        atms_pnt(3, natms_local1) = z
                        nrm_atms_pnt(natms_local1) = SQRT(x*x + y*y + z*z)
                     END IF
                  END DO
                  !
                  !
                  IF (natms_local1 > 0) THEN
                     !
                     ! build the step
                     DO iatom = 1, natms_local1
                        buf_tmp = 1.0_dp
                        pra(1) = atms_pnt(1, iatom)
                        pra(2) = atms_pnt(2, iatom)
                        pra(3) = atms_pnt(3, iatom)
                        pa = nrm_atms_pnt(iatom)
                        DO jatom = 1, natms_local1
                           IF (iatom == jatom) CYCLE
                           pb = nrm_atms_pnt(jatom)
                           x = pra(1) - atms_pnt(1, jatom)
                           y = pra(2) - atms_pnt(2, jatom)
                           z = pra(3) - atms_pnt(3, jatom)
                           ab = SQRT(x*x + y*y + z*z)
                           !
                           tmp = (pa - pb)/ab
                           tmp = 0.5_dp*(3.0_dp - tmp*tmp)*tmp
                           tmp = 0.5_dp*(3.0_dp - tmp*tmp)*tmp
                           tmp = 0.5_dp*(3.0_dp - tmp*tmp)*tmp
                           buf_tmp = buf_tmp*0.5_dp*(1.0_dp - tmp)
                        END DO
                        buf(iatom) = buf_tmp
                     END DO
                     res(1) = 0.0_dp
                     res(2) = 0.0_dp
                     res(3) = 0.0_dp
                     summe = 0.0_dp
                     DO iatom = 1, natms_local1
                        res(1) = res(1) + ratom(1, iatom)*buf(iatom)
                        res(2) = res(2) + ratom(2, iatom)*buf(iatom)
                        res(3) = res(3) + ratom(3, iatom)*buf(iatom)
                        summe = summe + buf(iatom)
                     END DO
                     res(1) = res(1)/summe
                     res(2) = res(2)/summe
                     res(3) = res(3)/summe
                     grid_x(i, j, k) = point(1) - res(1)
                     grid_y(i, j, k) = point(2) - res(2)
                     grid_z(i, j, k) = point(3) - res(3)
                  ELSE
                     grid_x(i, j, k) = 0.0_dp
                     grid_y(i, j, k) = 0.0_dp
                     grid_z(i, j, k) = 0.0_dp
                  END IF
               END DO ! grid
               END DO
               END DO
               !
            ELSE
               !
               ! here there is no atom
               DO k = ks, ke
               DO j = js, je
               DO i = is, ie
                  grid_x(i, j, k) = 0.0_dp
                  grid_y(i, j, k) = 0.0_dp
                  grid_z(i, j, k) = 0.0_dp
               END DO ! grid
               END DO
               END DO
               !
            END IF
            !
         END DO ! list
         END DO
         END DO

         DEALLOCATE (buf, ratom, atms_pnt, nrm_atms_pnt)

         CALL timestop(handle)

      END SUBROUTINE collocate_gauge_new

! **************************************************************************************************
!> \brief ...
!> \param box ...
! **************************************************************************************************
      SUBROUTINE deallocate_box(box)
      TYPE(box_type), DIMENSION(:, :, :), POINTER        :: box

      INTEGER                                            :: i, j, k

         IF (ASSOCIATED(box)) THEN
            DO k = LBOUND(box, 3), UBOUND(box, 3)
            DO j = LBOUND(box, 2), UBOUND(box, 2)
            DO i = LBOUND(box, 1), UBOUND(box, 1)
               IF (ASSOCIATED(box(i, j, k)%r)) THEN
                  DEALLOCATE (box(i, j, k)%r)
               END IF
            END DO
            END DO
            END DO
            DEALLOCATE (box)
         END IF
      END SUBROUTINE deallocate_box
   END SUBROUTINE current_set_gauge

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
!> \param iB ...
! **************************************************************************************************
   SUBROUTINE current_build_chi(current_env, qs_env, iB)
      !
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      INTEGER, INTENT(IN)                                :: iB

      IF (current_env%full) THEN
         CALL current_build_chi_many_centers(current_env, qs_env, iB)
      ELSE IF (current_env%nbr_center(1) > 1) THEN
         CALL current_build_chi_many_centers(current_env, qs_env, iB)
      ELSE
         CALL current_build_chi_one_center(current_env, qs_env, iB)
      END IF

   END SUBROUTINE current_build_chi

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
!> \param iB ...
! **************************************************************************************************
   SUBROUTINE current_build_chi_many_centers(current_env, qs_env, iB)
      !
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      INTEGER, INTENT(IN)                                :: iB

      CHARACTER(LEN=*), PARAMETER :: routineN = 'current_build_chi_many_centers'

      INTEGER :: handle, icenter, idir, idir2, ii, iiB, iii, iiiB, ispin, istate, j, jstate, &
         max_states, nao, natom, nbr_center(2), nmo, nspins, nstate_loc, nstates(2), output_unit
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf
      INTEGER, DIMENSION(:), POINTER                     :: row_blk_sizes
      LOGICAL                                            :: chi_pbc, gapw
      REAL(dp)                                           :: chi(3), chi_tmp, contrib, contrib2, &
                                                            dk(3), int_current(3), &
                                                            int_current_tmp, maxocc
      TYPE(cell_type), POINTER                           :: cell
      TYPE(cp_2d_i_p_type), DIMENSION(:), POINTER        :: center_list
      TYPE(cp_2d_r_p_type), DIMENSION(:), POINTER        :: centers_set
      TYPE(cp_fm_struct_type), POINTER                   :: tmp_fm_struct
      TYPE(cp_fm_type)                                   :: psi0, psi_D, psi_p1, psi_p2, psi_rxp
      TYPE(cp_fm_type), DIMENSION(3)                     :: p_rxp, r_p1, r_p2
      TYPE(cp_fm_type), DIMENSION(9, 3)                  :: rr_p1, rr_p2, rr_rxp
      TYPE(cp_fm_type), DIMENSION(:), POINTER            :: psi0_order
      TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: psi1_D, psi1_p, psi1_rxp
      TYPE(cp_fm_type), POINTER                          :: mo_coeff
      TYPE(cp_logger_type), POINTER                      :: logger
      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: op_mom_ao, op_p_ao
      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_mom_der_ao
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
         POINTER                                         :: sab_all, sab_orb
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set

!

      CALL timeset(routineN, handle)
      !
      NULLIFY (dft_control, mos, para_env, mo_coeff, op_mom_ao, &
               op_mom_der_ao, center_list, centers_set, &
               op_p_ao, psi1_p, psi1_rxp, psi1_D, &
               cell, particle_set, qs_kind_set)

      logger => cp_get_default_logger()
      output_unit = cp_logger_get_default_io_unit(logger)

      CALL get_qs_env(qs_env=qs_env, &
                      dft_control=dft_control, &
                      mos=mos, &
                      para_env=para_env, &
                      cell=cell, &
                      dbcsr_dist=dbcsr_dist, &
                      particle_set=particle_set, &
                      qs_kind_set=qs_kind_set, &
                      sab_all=sab_all, &
                      sab_orb=sab_orb)

      nspins = dft_control%nspins
      gapw = dft_control%qs_control%gapw

      CALL get_current_env(current_env=current_env, &
                           chi_pbc=chi_pbc, &
                           nao=nao, &
                           nbr_center=nbr_center, &
                           center_list=center_list, &
                           centers_set=centers_set, &
                           psi1_p=psi1_p, &
                           psi1_rxp=psi1_rxp, &
                           psi1_D=psi1_D, &
                           nstates=nstates, &
                           psi0_order=psi0_order)
      !
      ! get max nbr of states per center
      max_states = 0
      DO ispin = 1, nspins
         DO icenter = 1, nbr_center(ispin)
            max_states = MAX(max_states, center_list(ispin)%array(1, icenter + 1)&
                 &                     - center_list(ispin)%array(1, icenter))
         END DO
      END DO
      !
      ! Allocate sparse matrices for dipole, quadrupole and their derivatives => 9x3
      ! Remember the derivatives are antisymmetric
      CALL dbcsr_allocate_matrix_set(op_mom_ao, 9)
      CALL dbcsr_allocate_matrix_set(op_mom_der_ao, 9, 3)
      !
      ! prepare for allocation
      natom = SIZE(particle_set, 1)
      ALLOCATE (first_sgf(natom))
      ALLOCATE (last_sgf(natom))
      CALL get_particle_set(particle_set, qs_kind_set, &
                            first_sgf=first_sgf, &
                            last_sgf=last_sgf)
      ALLOCATE (row_blk_sizes(natom))
      CALL dbcsr_convert_offsets_to_sizes(first_sgf, row_blk_sizes, last_sgf)
      DEALLOCATE (first_sgf)
      DEALLOCATE (last_sgf)
      !
      !
      ALLOCATE (op_mom_ao(1)%matrix)
      CALL dbcsr_create(matrix=op_mom_ao(1)%matrix, &
                        name="op_mom", &
                        dist=dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, &
                        row_blk_size=row_blk_sizes, col_blk_size=row_blk_sizes, &
                        mutable_work=.TRUE.)
      CALL cp_dbcsr_alloc_block_from_nbl(op_mom_ao(1)%matrix, sab_all)

      DO idir2 = 1, 3
         ALLOCATE (op_mom_der_ao(1, idir2)%matrix)
         CALL dbcsr_copy(op_mom_der_ao(1, idir2)%matrix, op_mom_ao(1)%matrix, &
                         "op_mom_der_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir2))))
      END DO

      DO idir = 2, SIZE(op_mom_ao, 1)
         ALLOCATE (op_mom_ao(idir)%matrix)
         CALL dbcsr_copy(op_mom_ao(idir)%matrix, op_mom_ao(1)%matrix, &
                         "op_mom_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir))))
         DO idir2 = 1, 3
            ALLOCATE (op_mom_der_ao(idir, idir2)%matrix)
            CALL dbcsr_copy(op_mom_der_ao(idir, idir2)%matrix, op_mom_ao(1)%matrix, &
                            "op_mom_der_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir*idir2))))
         END DO
      END DO
      !
      CALL dbcsr_allocate_matrix_set(op_p_ao, 3)
      ALLOCATE (op_p_ao(1)%matrix)
      CALL dbcsr_create(matrix=op_p_ao(1)%matrix, &
                        name="op_p_ao", &
                        dist=dbcsr_dist, matrix_type=dbcsr_type_antisymmetric, &
                        row_blk_size=row_blk_sizes, col_blk_size=row_blk_sizes, &
                        mutable_work=.TRUE.)
      CALL cp_dbcsr_alloc_block_from_nbl(op_p_ao(1)%matrix, sab_orb)

      DO idir = 2, 3
         ALLOCATE (op_p_ao(idir)%matrix)
         CALL dbcsr_copy(op_p_ao(idir)%matrix, op_p_ao(1)%matrix, &
                         "op_p_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir))))
      END DO
      !
      !
      DEALLOCATE (row_blk_sizes)
      !
      !
      ! Allocate full matrices for only one vector
      mo_coeff => psi0_order(1)
      NULLIFY (tmp_fm_struct)
      CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, &
                               ncol_global=max_states, para_env=para_env, &
                               context=mo_coeff%matrix_struct%context)
      CALL cp_fm_create(psi0, tmp_fm_struct)
      CALL cp_fm_create(psi_D, tmp_fm_struct)
      CALL cp_fm_create(psi_rxp, tmp_fm_struct)
      CALL cp_fm_create(psi_p1, tmp_fm_struct)
      CALL cp_fm_create(psi_p2, tmp_fm_struct)
      CALL cp_fm_struct_release(tmp_fm_struct)
      !
      CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, &
                               ncol_global=max_states, para_env=para_env, &
                               context=mo_coeff%matrix_struct%context)
      DO idir = 1, 3
         CALL cp_fm_create(p_rxp(idir), tmp_fm_struct, set_zero=.TRUE.)
         CALL cp_fm_create(r_p1(idir), tmp_fm_struct, set_zero=.TRUE.)
         CALL cp_fm_create(r_p2(idir), tmp_fm_struct, set_zero=.TRUE.)
         DO idir2 = 1, 9
            CALL cp_fm_create(rr_rxp(idir2, idir), tmp_fm_struct, set_zero=.TRUE.)
            CALL cp_fm_create(rr_p1(idir2, idir), tmp_fm_struct, set_zero=.TRUE.)
            CALL cp_fm_create(rr_p2(idir2, idir), tmp_fm_struct, set_zero=.TRUE.)
         END DO
      END DO
      CALL cp_fm_struct_release(tmp_fm_struct)
      !
      !
      !
      ! recompute the linear momentum matrices
      CALL build_lin_mom_matrix(qs_env, op_p_ao)
      !CALL p_xyz_ao(op_p_ao,qs_env,minimum_image=.FALSE.)
      !
      !
      ! get iiB and iiiB
      CALL set_vecp(iB, iiB, iiiB)
      DO ispin = 1, nspins
         !
         ! get ground state MOS
         nmo = nstates(ispin)
         mo_coeff => psi0_order(ispin)
         CALL get_mo_set(mo_set=mos(ispin), maxocc=maxocc)
         !
         ! Initialize the temporary vector chi
         chi = 0.0_dp
         int_current = 0.0_dp
         !
         ! Start loop over the occupied  states
         DO icenter = 1, nbr_center(ispin)
            !
            ! Get the Wannier center of the istate-th ground state orbital
            dk(1:3) = centers_set(ispin)%array(1:3, icenter)
            !
            ! Compute the multipole integrals for the state istate,
            ! using as reference center the corresponding Wannier center
            DO idir = 1, 9
               CALL dbcsr_set(op_mom_ao(idir)%matrix, 0.0_dp)
               DO idir2 = 1, 3
                  CALL dbcsr_set(op_mom_der_ao(idir, idir2)%matrix, 0.0_dp)
               END DO
            END DO
            CALL rRc_xyz_der_ao(op_mom_ao, op_mom_der_ao, qs_env, dk, order=2, &
                                minimum_image=.FALSE., soft=gapw)
            !
            ! collecte the states that belong to a given center
            CALL cp_fm_set_all(psi0, 0.0_dp)
            CALL cp_fm_set_all(psi_rxp, 0.0_dp)
            CALL cp_fm_set_all(psi_D, 0.0_dp)
            CALL cp_fm_set_all(psi_p1, 0.0_dp)
            CALL cp_fm_set_all(psi_p2, 0.0_dp)
            nstate_loc = center_list(ispin)%array(1, icenter + 1) - center_list(ispin)%array(1, icenter)
            jstate = 1
            DO j = center_list(ispin)%array(1, icenter), center_list(ispin)%array(1, icenter + 1) - 1
               istate = center_list(ispin)%array(2, j)
               !
               ! block the states that belong to this center
               CALL cp_fm_to_fm(mo_coeff, psi0, 1, istate, jstate)
               !
               CALL cp_fm_to_fm(psi1_rxp(ispin, iB), psi_rxp, 1, istate, jstate)
               IF (current_env%full) CALL cp_fm_to_fm(psi1_D(ispin, iB), psi_D, 1, istate, jstate)
               !
               ! psi1_p_iiB_istate and psi1_p_iiiB_istate
               CALL cp_fm_to_fm(psi1_p(ispin, iiB), psi_p1, 1, istate, jstate)
               CALL cp_fm_to_fm(psi1_p(ispin, iiiB), psi_p2, 1, istate, jstate)
               !
               jstate = jstate + 1
            END DO ! istate
            !
            ! scale the ordered mos
            IF (current_env%full) CALL cp_fm_scale_and_add(1.0_dp, psi_rxp, -1.0_dp, psi_D)
            !
            DO idir = 1, 3
               CALL set_vecp(idir, ii, iii)
               CALL cp_dbcsr_sm_fm_multiply(op_p_ao(idir)%matrix, psi_rxp, &
                                            p_rxp(idir), ncol=nstate_loc, alpha=1.e0_dp)
               IF (iiiB == iii .OR. iiiB == ii) THEN
                  CALL cp_dbcsr_sm_fm_multiply(op_mom_ao(idir)%matrix, psi_p1, &
                                               r_p1(idir), ncol=nstate_loc, alpha=1.e0_dp)
               END IF
               IF (iiB == iii .OR. iiB == ii) THEN
                  CALL cp_dbcsr_sm_fm_multiply(op_mom_ao(idir)%matrix, psi_p2, &
                                               r_p2(idir), ncol=nstate_loc, alpha=1.e0_dp)
               END IF
               DO idir2 = 1, 9
                  IF (idir2 == ii .OR. idir2 == iii) THEN
                     CALL cp_dbcsr_sm_fm_multiply(op_mom_der_ao(idir2, idir)%matrix, psi_rxp, &
                                                  rr_rxp(idir2, idir), ncol=nstate_loc, alpha=1.e0_dp)
                  END IF
                  !
                  IF (idir2 == ind_m2(ii, iiiB) .OR. idir2 == ind_m2(iii, iiiB) .OR. idir2 == iiiB) THEN
                     CALL cp_dbcsr_sm_fm_multiply(op_mom_der_ao(idir2, idir)%matrix, psi_p1, &
                                                  rr_p1(idir2, idir), ncol=nstate_loc, alpha=1.e0_dp)
                  END IF
                  !
                  IF (idir2 == ind_m2(ii, iiB) .OR. idir2 == ind_m2(iii, iiB) .OR. idir2 == iiB) THEN
                     CALL cp_dbcsr_sm_fm_multiply(op_mom_der_ao(idir2, idir)%matrix, psi_p2, &
                                                  rr_p2(idir2, idir), ncol=nstate_loc, alpha=1.e0_dp)
                  END IF
               END DO
            END DO
            !
            ! Multuply left and right by the appropriate coefficients and sum into the
            ! correct component of the chi tensor using the appropriate multiplicative factor
            ! (don't forget the occupation number)
            ! Loop over the cartesian components of the tensor
            ! The loop over the components of the external field is external, thereby
            ! only one column of the chi tensor is computed here
            DO idir = 1, 3
               chi_tmp = 0.0_dp
               int_current_tmp = 0.0_dp
               !
               ! get ii and iii
               CALL set_vecp(idir, ii, iii)
               !
               ! term: 2[C0| (r-dk)_ii |d_iii(C1(rxp-D))]-2[C0| (r-dk)_iii |d_ii(C1(rxp-D))]
               ! the factor 2 should be already included in the matrix elements
               contrib = 0.0_dp
               CALL cp_fm_trace(psi0, rr_rxp(ii, iii), contrib)
               chi_tmp = chi_tmp + 2.0_dp*contrib
               !
               contrib = 0.0_dp
               CALL cp_fm_trace(psi0, rr_rxp(iii, ii), contrib)
               chi_tmp = chi_tmp - 2.0_dp*contrib
               !
               ! correction: dk_ii*2[C0| d_iii(C1(rxp-D))] - dk_iii*2[C0| d_ii(C1(rxp-D))]
               ! factor 2 not included in the matrix elements
               contrib = 0.0_dp
               CALL cp_fm_trace(psi0, p_rxp(iii), contrib)
               IF (.NOT. chi_pbc) chi_tmp = chi_tmp + 2.0_dp*dk(ii)*contrib
               int_current_tmp = int_current_tmp + 2.0_dp*contrib
               !
               contrib2 = 0.0_dp
               CALL cp_fm_trace(psi0, p_rxp(ii), contrib2)
               IF (.NOT. chi_pbc) chi_tmp = chi_tmp - 2.0_dp*dk(iii)*contrib2
               !
               ! term: -2[C0| (r-dk)_ii  (r-dk)_iiB | d_iii(C1(piiiB))] \
               !       +2[C0| (r-dk)_iii (r-dk)_iiB | d_ii(C1(piiiB))]
               ! the factor 2 should be already included in the matrix elements
               contrib = 0.0_dp
               idir2 = ind_m2(ii, iiB)
               CALL cp_fm_trace(psi0, rr_p2(idir2, iii), contrib)
               chi_tmp = chi_tmp - 2.0_dp*contrib
               contrib2 = 0.0_dp
               IF (iiB == iii) THEN
                  CALL cp_fm_trace(psi0, r_p2(ii), contrib2)
                  chi_tmp = chi_tmp - contrib2
               END IF
               !
               contrib = 0.0_dp
               idir2 = ind_m2(iii, iiB)
               CALL cp_fm_trace(psi0, rr_p2(idir2, ii), contrib)
               chi_tmp = chi_tmp + 2.0_dp*contrib
               contrib2 = 0.0_dp
               IF (iiB == ii) THEN
                  CALL cp_fm_trace(psi0, r_p2(iii), contrib2)
                  chi_tmp = chi_tmp + contrib2
               END IF
               !
               ! correction: -dk_ii * 2[C0|(r-dk)_iiB | d_iii(C1(piiiB))] \
               !             +dk_iii * 2[C0|(r-dk)_iiB | d_ii(C1(piiiB))]
               ! the factor 2 should be already included in the matrix elements
               ! no additional correction terms because of the orthogonality between C0 and C1
               contrib = 0.0_dp
               CALL cp_fm_trace(psi0, rr_p2(iiB, iii), contrib)
               IF (.NOT. chi_pbc) chi_tmp = chi_tmp - 2.0_dp*dk(ii)*contrib
               int_current_tmp = int_current_tmp - 2.0_dp*contrib
               !
               contrib2 = 0.0_dp
               CALL cp_fm_trace(psi0, rr_p2(iiB, ii), contrib2)
               IF (.NOT. chi_pbc) chi_tmp = chi_tmp + 2.0_dp*dk(iii)*contrib2
               !
               ! term: +2[C0| (r-dk)_ii  (r-dk)_iiiB | d_iii(C1(piiB))] \
               !       -2[C0| (r-dk)_iii (r-dk)_iiiB | d_ii(C1(piiB))]
               ! the factor 2 should be already included in the matrix elements
               contrib = 0.0_dp
               idir2 = ind_m2(ii, iiiB)
               CALL cp_fm_trace(psi0, rr_p1(idir2, iii), contrib)
               chi_tmp = chi_tmp + 2.0_dp*contrib
               contrib2 = 0.0_dp
               IF (iiiB == iii) THEN
                  CALL cp_fm_trace(psi0, r_p1(ii), contrib2)
                  chi_tmp = chi_tmp + contrib2
               END IF
               !
               contrib = 0.0_dp
               idir2 = ind_m2(iii, iiiB)
               CALL cp_fm_trace(psi0, rr_p1(idir2, ii), contrib)
               chi_tmp = chi_tmp - 2.0_dp*contrib
               contrib2 = 0.0_dp
               IF (iiiB == ii) THEN
                  CALL cp_fm_trace(psi0, r_p1(iii), contrib2)
                  chi_tmp = chi_tmp - contrib2
               END IF
               !
               ! correction: +dk_ii * 2[C0|(r-dk)_iiiB | d_iii(C1(piiB))] +\
               !             -dk_iii * 2[C0|(r-dk)_iiiB | d_ii(C1(piiB))]
               ! the factor 2 should be already included in the matrix elements
               contrib = 0.0_dp
               CALL cp_fm_trace(psi0, rr_p1(iiiB, iii), contrib)
               IF (.NOT. chi_pbc) chi_tmp = chi_tmp + 2.0_dp*dk(ii)*contrib
               int_current_tmp = int_current_tmp + 2.0_dp*contrib
               !
               contrib2 = 0.0_dp
               CALL cp_fm_trace(psi0, rr_p1(iiiB, ii), contrib2)
               IF (.NOT. chi_pbc) chi_tmp = chi_tmp - 2.0_dp*dk(iii)*contrib2
               !
               ! accumulate
               chi(idir) = chi(idir) + maxocc*chi_tmp
               int_current(iii) = int_current(iii) + int_current_tmp
            END DO ! idir

         END DO ! icenter
         !
         DO idir = 1, 3
            current_env%chi_tensor(idir, iB, ispin) = current_env%chi_tensor(idir, iB, ispin) + &
                                                      chi(idir)
            IF (output_unit > 0) THEN
               !WRITE(output_unit,'(A,E12.6)') ' chi_'//ACHAR(119+idir)//ACHAR(119+iB)//&
               !     &                         ' = ',chi(idir)
               !WRITE(output_unit,'(A,E12.6)') ' analytic \int j_'//ACHAR(119+idir)//ACHAR(119+iB)//&
               !     &                         '(r) d^3r = ',int_current(idir)
            END IF
         END DO
         !
      END DO ! ispin
      !
      ! deallocate the sparse matrices
      CALL dbcsr_deallocate_matrix_set(op_mom_ao)
      CALL dbcsr_deallocate_matrix_set(op_mom_der_ao)
      CALL dbcsr_deallocate_matrix_set(op_p_ao)

      CALL cp_fm_release(psi0)
      CALL cp_fm_release(psi_rxp)
      CALL cp_fm_release(psi_D)
      CALL cp_fm_release(psi_p1)
      CALL cp_fm_release(psi_p2)
      DO idir = 1, 3
         CALL cp_fm_release(p_rxp(idir))
         CALL cp_fm_release(r_p1(idir))
         CALL cp_fm_release(r_p2(idir))
         DO idir2 = 1, 9
            CALL cp_fm_release(rr_rxp(idir2, idir))
            CALL cp_fm_release(rr_p1(idir2, idir))
            CALL cp_fm_release(rr_p2(idir2, idir))
         END DO
      END DO

      CALL timestop(handle)

   END SUBROUTINE current_build_chi_many_centers

! **************************************************************************************************
!> \brief ...
!> \param current_env ...
!> \param qs_env ...
!> \param iB ...
! **************************************************************************************************
   SUBROUTINE current_build_chi_one_center(current_env, qs_env, iB)
      !
      TYPE(current_env_type)                             :: current_env
      TYPE(qs_environment_type), POINTER                 :: qs_env
      INTEGER, INTENT(IN)                                :: iB

      CHARACTER(LEN=*), PARAMETER :: routineN = 'current_build_chi_one_center'

      INTEGER :: handle, idir, idir2, iiB, iiiB, ispin, jdir, jjdir, kdir, max_states, nao, natom, &
         nbr_center(2), nmo, nspins, nstates(2), output_unit
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf
      INTEGER, DIMENSION(:), POINTER                     :: row_blk_sizes
      LOGICAL                                            :: chi_pbc, gapw
      REAL(dp)                                           :: chi(3), contrib, dk(3), int_current(3), &
                                                            maxocc
      TYPE(cell_type), POINTER                           :: cell
      TYPE(cp_2d_i_p_type), DIMENSION(:), POINTER        :: center_list
      TYPE(cp_2d_r_p_type), DIMENSION(:), POINTER        :: centers_set
      TYPE(cp_fm_type)                                   :: buf
      TYPE(cp_fm_type), DIMENSION(:), POINTER            :: psi0_order
      TYPE(cp_fm_type), DIMENSION(:, :), POINTER         :: psi1_p, psi1_rxp
      TYPE(cp_fm_type), POINTER                          :: mo_coeff
      TYPE(cp_logger_type), POINTER                      :: logger
      TYPE(dbcsr_distribution_type), POINTER             :: dbcsr_dist
      TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: op_mom_ao, op_p_ao
      TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_mom_der_ao
      TYPE(dft_control_type), POINTER                    :: dft_control
      TYPE(mo_set_type), DIMENSION(:), POINTER           :: mos
      TYPE(mp_para_env_type), POINTER                    :: para_env
      TYPE(neighbor_list_set_p_type), DIMENSION(:), &
         POINTER                                         :: sab_all, sab_orb
      TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
      TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set

!

      CALL timeset(routineN, handle)
      !
      NULLIFY (dft_control, mos, para_env, mo_coeff, op_mom_ao, &
               op_mom_der_ao, center_list, centers_set, &
               op_p_ao, psi1_p, psi1_rxp, cell, psi0_order)

      logger => cp_get_default_logger()
      output_unit = cp_logger_get_default_io_unit(logger)

      CALL get_qs_env(qs_env=qs_env, &
                      dft_control=dft_control, &
                      mos=mos, &
                      para_env=para_env, &
                      cell=cell, &
                      particle_set=particle_set, &
                      qs_kind_set=qs_kind_set, &
                      sab_all=sab_all, &
                      sab_orb=sab_orb, &
                      dbcsr_dist=dbcsr_dist)

      nspins = dft_control%nspins
      gapw = dft_control%qs_control%gapw

      CALL get_current_env(current_env=current_env, &
                           chi_pbc=chi_pbc, &
                           nao=nao, &
                           nbr_center=nbr_center, &
                           center_list=center_list, &
                           centers_set=centers_set, &
                           psi1_p=psi1_p, &
                           psi1_rxp=psi1_rxp, &
                           nstates=nstates, &
                           psi0_order=psi0_order)
      !
      max_states = MAXVAL(nstates(1:nspins))
      !
      ! Allocate sparse matrices for dipole, quadrupole and their derivatives => 9x3
      ! Remember the derivatives are antisymmetric
      CALL dbcsr_allocate_matrix_set(op_mom_ao, 9)
      CALL dbcsr_allocate_matrix_set(op_mom_der_ao, 9, 3)
      !
      ! prepare for allocation
      natom = SIZE(particle_set, 1)
      ALLOCATE (first_sgf(natom))
      ALLOCATE (last_sgf(natom))
      CALL get_particle_set(particle_set, qs_kind_set, &
                            first_sgf=first_sgf, &
                            last_sgf=last_sgf)
      ALLOCATE (row_blk_sizes(natom))
      CALL dbcsr_convert_offsets_to_sizes(first_sgf, row_blk_sizes, last_sgf)
      DEALLOCATE (first_sgf)
      DEALLOCATE (last_sgf)
      !
      !
      ALLOCATE (op_mom_ao(1)%matrix)
      CALL dbcsr_create(matrix=op_mom_ao(1)%matrix, &
                        name="op_mom", &
                        dist=dbcsr_dist, matrix_type=dbcsr_type_no_symmetry, &
                        row_blk_size=row_blk_sizes, col_blk_size=row_blk_sizes, &
                        mutable_work=.TRUE.)
      CALL cp_dbcsr_alloc_block_from_nbl(op_mom_ao(1)%matrix, sab_all)

      DO idir2 = 1, 3
         ALLOCATE (op_mom_der_ao(1, idir2)%matrix)
         CALL dbcsr_copy(op_mom_der_ao(1, idir2)%matrix, op_mom_ao(1)%matrix, &
                         "op_mom_der_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir2))))
      END DO

      DO idir = 2, SIZE(op_mom_ao, 1)
         ALLOCATE (op_mom_ao(idir)%matrix)
         CALL dbcsr_copy(op_mom_ao(idir)%matrix, op_mom_ao(1)%matrix, &
                         "op_mom_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir))))
         DO idir2 = 1, 3
            ALLOCATE (op_mom_der_ao(idir, idir2)%matrix)
            CALL dbcsr_copy(op_mom_der_ao(idir, idir2)%matrix, op_mom_ao(1)%matrix, &
                            "op_mom_der_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir*idir2))))
         END DO
      END DO
      !
      CALL dbcsr_allocate_matrix_set(op_p_ao, 3)
      ALLOCATE (op_p_ao(1)%matrix)
      CALL dbcsr_create(matrix=op_p_ao(1)%matrix, &
                        name="op_p_ao", &
                        dist=dbcsr_dist, matrix_type=dbcsr_type_antisymmetric, &
                        row_blk_size=row_blk_sizes, col_blk_size=row_blk_sizes, &
                        mutable_work=.TRUE.)
      CALL cp_dbcsr_alloc_block_from_nbl(op_p_ao(1)%matrix, sab_orb)

      DO idir = 2, 3
         ALLOCATE (op_p_ao(idir)%matrix)
         CALL dbcsr_copy(op_p_ao(idir)%matrix, op_p_ao(1)%matrix, &
                         "op_p_ao"//"-"//TRIM(ADJUSTL(cp_to_string(idir))))
      END DO
      !
      !
      DEALLOCATE (row_blk_sizes)
      !
      ! recompute the linear momentum matrices
      CALL build_lin_mom_matrix(qs_env, op_p_ao)
      !CALL p_xyz_ao(op_p_ao,qs_env,minimum_image=.FALSE.)
      !
      !
      ! get iiB and iiiB
      CALL set_vecp(iB, iiB, iiiB)
      DO ispin = 1, nspins
         !
         CPASSERT(nbr_center(ispin) == 1)
         !
         ! get ground state MOS
         nmo = nstates(ispin)
         mo_coeff => psi0_order(ispin)
         CALL get_mo_set(mo_set=mos(ispin), maxocc=maxocc)
         !
         ! Create buffer matrix
         CALL cp_fm_create(buf, mo_coeff%matrix_struct)
         !
         ! Initialize the temporary vector chi
         chi = 0.0_dp
         int_current = 0.0_dp
         !
         !
         ! Get the Wannier center of the istate-th ground state orbital
         dk(1:3) = centers_set(ispin)%array(1:3, 1)
         !
         ! Compute the multipole integrals for the state istate,
         ! using as reference center the corresponding Wannier center
         DO idir = 1, 9
            CALL dbcsr_set(op_mom_ao(idir)%matrix, 0.0_dp)
            DO idir2 = 1, 3
               CALL dbcsr_set(op_mom_der_ao(idir, idir2)%matrix, 0.0_dp)
            END DO
         END DO
         CALL rRc_xyz_der_ao(op_mom_ao, op_mom_der_ao, qs_env, dk, order=2, &
                             minimum_image=.FALSE., soft=gapw)
         !
         !
         ! Multuply left and right by the appropriate coefficients and sum into the
         ! correct component of the chi tensor using the appropriate multiplicative factor
         ! (don't forget the occupation number)
         ! Loop over the cartesian components of the tensor
         ! The loop over the components of the external field is external, thereby
         ! only one column of the chi tensor is computed here
         DO idir = 1, 3
            !
            !
            !
            ! term: dk_ii*2[C0| d_iii(C1(rxp-D))] - dk_iii*2[C0| d_ii(C1(rxp-D))]
            IF (.NOT. chi_pbc) THEN
               CALL cp_dbcsr_sm_fm_multiply(op_p_ao(idir)%matrix, mo_coeff, &
                                            buf, ncol=nmo, alpha=1.e0_dp)
               DO jdir = 1, 3
                  DO kdir = 1, 3
                     IF (Levi_Civita(kdir, jdir, idir) == 0.0_dp) CYCLE
                     CALL cp_fm_trace(buf, psi1_rxp(ispin, iB), contrib)
                     chi(kdir) = chi(kdir) - Levi_Civita(kdir, jdir, idir)*2.0_dp*dk(jdir)*contrib
                  END DO
               END DO
            END IF
            !
            !
            !
            ! term: 2[C0| (r-dk)_ii |d_iii(C1(rxp-D))]-2[C0| (r-dk)_iii |d_ii(C1(rxp-D))]
            ! and
            ! term: -dk_ii * 2[C0|(r-dk)_iiB | d_iii(C1(piiiB))] +
            !       +dk_iii * 2[C0|(r-dk)_iiB | d_ii(C1(piiiB))]
            ! and
            ! term: +dk_ii * 2[C0|(r-dk)_iiiB | d_iii(C1(piiB))] +
            !       -dk_iii * 2[C0|(r-dk)_iiiB | d_ii(C1(piiB))]
            DO jdir = 1, 3
               CALL cp_dbcsr_sm_fm_multiply(op_mom_der_ao(jdir, idir)%matrix, mo_coeff, &
                                            buf, ncol=nmo, alpha=1.e0_dp)
               DO kdir = 1, 3
                  IF (Levi_Civita(kdir, jdir, idir) == 0.0_dp) CYCLE
                  CALL cp_fm_trace(buf, psi1_rxp(ispin, iB), contrib)
                  chi(kdir) = chi(kdir) - Levi_Civita(kdir, jdir, idir)*2.0_dp*contrib
               END DO
               !
               IF (.NOT. chi_pbc) THEN
                  IF (jdir == iiB) THEN
                     DO jjdir = 1, 3
                        DO kdir = 1, 3
                           IF (Levi_Civita(kdir, jjdir, idir) == 0.0_dp) CYCLE
                           CALL cp_fm_trace(buf, psi1_p(ispin, iiiB), contrib)
                           chi(kdir) = chi(kdir) + Levi_Civita(kdir, jjdir, idir)*2.0_dp*dk(jjdir)*contrib
                        END DO
                     END DO
                  END IF
                  !
                  IF (jdir == iiiB) THEN
                     DO jjdir = 1, 3
                        DO kdir = 1, 3
                           IF (Levi_Civita(kdir, jjdir, idir) == 0.0_dp) CYCLE
                           CALL cp_fm_trace(buf, psi1_p(ispin, iiB), contrib)
                           chi(kdir) = chi(kdir) - Levi_Civita(kdir, jjdir, idir)*2.0_dp*dk(jjdir)*contrib
                        END DO
                     END DO
                  END IF
               END IF
            END DO
            !
            !
            !
            ! term1: -2[C0| (r-dk)_ii  (r-dk)_iiB | d_iii(C1(piiiB))] +
            !        +2[C0| (r-dk)_iii (r-dk)_iiB | d_ii(C1(piiiB))]
            ! and
            ! term1: +2[C0| (r-dk)_ii  (r-dk)_iiiB | d_iii(C1(piiB))] +
            !        -2[C0| (r-dk)_iii (r-dk)_iiiB | d_ii(C1(piiB))]
            ! HERE THERE IS ONE EXTRA MULTIPLY
            DO jdir = 1, 3
               CALL cp_dbcsr_sm_fm_multiply(op_mom_der_ao(ind_m2(jdir, iiB), idir)%matrix, mo_coeff, &
                                            buf, ncol=nmo, alpha=1.e0_dp)
               DO kdir = 1, 3
                  IF (Levi_Civita(kdir, jdir, idir) == 0.0_dp) CYCLE
                  CALL cp_fm_trace(buf, psi1_p(ispin, iiiB), contrib)
                  chi(kdir) = chi(kdir) + Levi_Civita(kdir, jdir, idir)*2.0_dp*contrib
               END DO
               !
               CALL cp_dbcsr_sm_fm_multiply(op_mom_der_ao(ind_m2(jdir, iiiB), idir)%matrix, mo_coeff, &
                                            buf, ncol=nmo, alpha=1.e0_dp)
               DO kdir = 1, 3
                  IF (Levi_Civita(kdir, jdir, idir) == 0.0_dp) CYCLE
                  CALL cp_fm_trace(buf, psi1_p(ispin, iiB), contrib)
                  chi(kdir) = chi(kdir) - Levi_Civita(kdir, jdir, idir)*2.0_dp*contrib
               END DO
            END DO
            !
            !
            !
            ! term2: -2[C0| (r-dk)_ii  (r-dk)_iiB | d_iii(C1(piiiB))] +
            !        +2[C0| (r-dk)_iii (r-dk)_iiB | d_ii(C1(piiiB))]
            ! and
            ! term2: +2[C0| (r-dk)_ii  (r-dk)_iiiB | d_iii(C1(piiB))] +
            !        -2[C0| (r-dk)_iii (r-dk)_iiiB | d_ii(C1(piiB))]
            CALL cp_dbcsr_sm_fm_multiply(op_mom_ao(idir)%matrix, mo_coeff, &
                                         buf, ncol=nmo, alpha=1.e0_dp)
            DO jdir = 1, 3
               DO kdir = 1, 3
                  IF (Levi_Civita(kdir, idir, jdir) == 0.0_dp) CYCLE
                  IF (iiB == jdir) THEN
                     CALL cp_fm_trace(buf, psi1_p(ispin, iiiB), contrib)
                     chi(kdir) = chi(kdir) + Levi_Civita(kdir, idir, jdir)*contrib
                  END IF
               END DO
            END DO
            !
            DO jdir = 1, 3
               DO kdir = 1, 3
                  IF (Levi_Civita(kdir, idir, jdir) == 0.0_dp) CYCLE
                  IF (iiiB == jdir) THEN
                     CALL cp_fm_trace(buf, psi1_p(ispin, iiB), contrib)
                     chi(kdir) = chi(kdir) - Levi_Civita(kdir, idir, jdir)*contrib
                  END IF
                  !
               END DO
            END DO
            !
            !
            !
            !
         END DO ! idir
         !
         DO idir = 1, 3
            current_env%chi_tensor(idir, iB, ispin) = current_env%chi_tensor(idir, iB, ispin) + &
                                                      maxocc*chi(idir)
            IF (output_unit > 0) THEN
               !WRITE(output_unit,'(A,E12.6)') ' chi_'//ACHAR(119+idir)//ACHAR(119+iB)//&
               !     &                         ' = ',maxocc * chi(idir)
            END IF
         END DO
         !
         CALL cp_fm_release(buf)
      END DO ! ispin
      !
      ! deallocate the sparse matrices
      CALL dbcsr_deallocate_matrix_set(op_mom_ao)
      CALL dbcsr_deallocate_matrix_set(op_mom_der_ao)
      CALL dbcsr_deallocate_matrix_set(op_p_ao)

      CALL timestop(handle)

   END SUBROUTINE current_build_chi_one_center

END MODULE qs_linres_current
